示例#1
0
    def to_networkx_map(dataset, interactor_a, interactor_b, selfloop='n'):
        """ Create a graph from dataset by using NetworkX package
        Args:
            dataset (:obj: DataFrame from pandas): the Dataframe with header and at least two columns
            interactor_a (:obj: str): column for interactor A
            interactor_b (:obj: str): column for interactor B
            selfloop (:obj: str, optional): create a graph with or without selfloop, it should be 'y' or 'n'
        Returns:
            :obj:'Graph from networkx': the undirected graph from dataset
        Raises:
            ValueError if the column(s) is not found in the dataset
            ValueError if the selfloop is not 'y' or 'n'
            ValueError if the graph is not successfully created   
        """

        if (interactor_a not in dataset) or (interactor_b not in dataset):
            raise ValueError('the column(s) is not found in the dataset')
        elif selfloop not in ['y', 'n']:
            raise ValueError("the selfloop should be 'y' or 'n'")
        else:
            if selfloop == 'y':
                nx_map = nx.from_pandas_dataframe(dataset, interactor_a, interactor_b)
            elif selfloop == 'n':
                nx_map = nx.from_pandas_dataframe(dataset, interactor_a, interactor_b)
                nx_map.remove_edges_from(nx_map.selfloop_edges())
            print("Retrieve the protein-protein interaction network with {} nodes".format(Parameter.num_of_node(nx_map)),
                  "and {} edges.".format(Parameter.num_of_edge(nx_map)))
            return nx_map
def readDGFrameFile(filename, interRRI_norRNA=1, support_read=3):
	fn_stat_dict = nested_dict()
	inter, intra = 0, 0
	with open(filename, 'r') as TXT:
		for line in TXT:
			line = line.strip()
			if not line or line.startswith('#'):
				continue
			arr = line.split('\t')
			if arr[1] == arr[5]:
				intra += 1
			else:
				inter += 1
	fn_stat_dict['inter'] = inter
	fn_stat_dict['intra'] = intra
	fn_stat_dict['all'] = intra + inter

	df = pd.read_csv(filename, header=0, sep='\t')
	df['type'] = ['intra' if i == j else 'inter' for i,j in zip(df['lchr'], df['rchr'])]
	df_inter_RRI = df[df['type']=='inter']
	nx_inter_RRI = nx.from_pandas_dataframe(df_inter_RRI, 'lchr', 'rchr')
	fn_stat_dict['uniq RRI']  = len(nx_inter_RRI.edges())
	if interRRI_norRNA:
		df_inter_RRI = df_inter_RRI[(df_inter_RRI['ltype'].isin(['mRNA', 'lncRNA'])) & (df_inter_RRI['rtype'].isin(['mRNA', 'lncRNA']))]
	df_inter_RRI = df_inter_RRI[df_inter_RRI['support']>=support_read]
	nx_inter_RRI = nx.from_pandas_dataframe(df_inter_RRI, 'lchr', 'rchr')
	nx_inter_RRI_info_dict, G_largest = RRI_network_property2(nx_inter_RRI)
	for i,j in nx_inter_RRI_info_dict.items():
		fn_stat_dict[i] = j
	# fn_stat_df['sampling'] = ''
	fn_stat_df = pd.DataFrame(fn_stat_dict, index=[0])
	return fn_stat_df
示例#3
0
def compute_net_stats_on_read_hrg_pickle(orig_df, gn, metricx):
    with open(r"Results/{}_hstars.pickle".format(gn), "rb") as in_file:
        c = cPickle.load(in_file)
    print " ==> pickle file loaded"
    if isinstance(c, dict):
        if len(c.keys()) == 1:
            c = c.values()[0]  # we have k nx gobjects
        else:
            print c.keys()
    if len(orig_df.columns) >= 3:
        orig = nx.from_pandas_dataframe(orig_df,
                                        'src',
                                        'trg',
                                        edge_attr=['ts'])
    else:
        orig = nx.from_pandas_dataframe(orig_df, 'src', 'trg')

    # metrics.network_properties([orig], metricx, c, name=gn, out_tsv=False)
    ## --
    p = mp.Pool(processes=10)
    for j, gnx in enumerate(c):
        if isinstance(gnx, list):
            gnx = gnx[0]
        p.apply_async(metrics.network_properties,
                      args=(
                          [orig],
                          ['clust'],
                          gnx,
                          gn,
                          True,
                      ),
                      callback=collect_results)
    p.close()
    p.join()
    print(results)
示例#4
0
    def graph_from_edgedata(edgedata, attr='Weight', directed=True,connected_component=False):
        '''
        :param edgedata: 边的数据
        :param attr: string 或 list; 边的属性数据,如果没有权重,设置attr=None,
        :param directed: 有向图还是无向图
        :param connected_component: 返回最大联通子图,默认为True,对于有向图为weakly_connected
                                    未开发

        :return: networkx.Graph 或 DiGraph
        '''
        if len(edgedata) < 1:
            if directed:
                return nx.DiGraph()
            else:
                return nx.Graph()

        if directed:
            graph = nx.from_pandas_dataframe(edgedata, 'Source', 'Target',
                                             edge_attr=attr, create_using=nx.DiGraph())
            if connected_component:
                #返回最大联通子图
                graph = max(nx.weakly_connected_component_subgraphs(graph), key=len)
        else:
            graph = nx.from_pandas_dataframe(edgedata, 'Source', 'Target',
                                             edge_attr=attr, create_using=nx.Graph())
            if connected_component:
                graph =  max(nx.connected_component_subgraphs(graph), key=len)

        print('Directed Graph :', graph.is_directed())
        return graph
示例#5
0
def get_prod_rules(data_frame, nbr_blocks):
    df = data_frame
    nb = int(nbr_blocks)
    chunked_graphs_lst = []
    if nb:
        slice = int((df.ts.max() - df.ts.min()) / nb)
    WG = nx.from_pandas_dataframe(df, 'src', 'trg', ['ts'])  # whole graph
    pos = nx.spring_layout(WG)

    for blk in range(df.ts.min(), df.ts.max(), slice):
        mask = (df['ts'] >= blk) & (df['ts'] <= blk + slice)
        ldf = df.loc[mask]
        G = nx.from_pandas_dataframe(ldf, 'src', 'trg', ['ts'])
        chunked_graphs_lst.append(G)
    prules = derive_prules_from(chunked_graphs_lst)
    df = pd.DataFrame(columns=['rid', 'lhs', 'rhs', 'p'])

    for k, r in enumerate(prules):
        #print "{}: {}".format(k, [x for x in r if 'S' in x])# [len(x) for x in lhs if 'S' in x])
        # df = pd.concat ([df, pd.DataFrame([x for x in r], columns=['rid','lhs', 'rhs','p'])])
        bdf = pd.DataFrame([x for x in r], columns=['rid', 'lhs', 'rhs', 'p'])
        bdf['lcnt'] = bdf['lhs'].apply(lambda x: len(x))
        bdf['rcnt'] = bdf['rhs'].apply(lambda x: len(x))
        df = pd.concat([df, bdf])
        break

    print df.head()
    # print 'size of the rhs'[len(x) for x in df[df['lhs']=='S']['rhs']]
    # tdf = df[['lhs','rhs']].apply(lambda x: [len(r) for r in x])
    # tdf.columns=['lcnt','rcnt']
    # df =pd.concat([df,tdf],axis=1)
    # print df[['lcnt','rcnt']].describe()
    # # df.boxplot(['lcnt','rcnt'])
    # df.boxplot(by=['lhs','rhs'], notch=True)
    # # ax.set_xticks(range(10))
    df.plot.hist()
    plt.savefig('/tmp/outfig', bbox_inches='tight')
    exit()

    ptsg.plot_timestamped_graphs(chunked_graphs_lst,
                                 pos=pos,
                                 outfigname="tmp1")

    chunked_graphs_lst = []
    for blk in range(df.ts.min(), df.ts.max(), slice):
        mask = (df['ts'] <= blk + slice)
        ldf = df.loc[mask]
        G = nx.from_pandas_dataframe(ldf, 'src', 'trg', ['ts'])
        chunked_graphs_lst.append(G)
    # plot
    ptsg.plot_timestamped_graphs(chunked_graphs_lst,
                                 pos=pos,
                                 outfigname="tmp2")

    if 0:
        print
        for k, pr in enumerate(prules):  ## print enum rules
            print "{}\t{}".format(k, pr)
def get_hrg_production_rules(edgelist_data_frame, graph_name):
    from growing import derive_prules_from

    df = edgelist_data_frame
    try:
        G = nx.from_pandas_dataframe(df, 'src', 'trg', ['ts'])  # whole graph
    except Exception, e:
        print '==========================\n\t',
        print str(e)
        traceback.print_exc()
        G = nx.from_pandas_dataframe(df, 'src', 'trg')
示例#7
0
 def __init__(self, check_ins, friends, self_check=True):
     self.uid = check_ins['uid'].unique()
     self.locid = check_ins['locid'].unique()
     self.ul_graph = nx.from_pandas_dataframe(check_ins, 'uid', 'locid',
                                              ['weight'])
     self.uu_graph = nx.from_pandas_dataframe(friends, 'u1', 'u2')
     if self_check:
         uid_cmp = set(friends['u1'].unique()) | set(friends['u2'].unique())
         assert uid_cmp.issubset(set(self.uid))
         logging.debug('self check pass %d/%d' %
                       (len(uid_cmp), len(set(self.uid))))
def read_dir(dir='/Share/home/zhangqf7/gongjing/zebrafish/data/paris/shi-zp-5-rep-combine/downsampling_N', to_dgframe=0, get_inter_intra=1, read_nx=1, interRRI_norRNA=1, support_read=3):
	fn_ls = os.listdir(dir)
	# print fn_ls

	fn_stat_dict = nested_dict()
	downsampling_N_draw = dir + '.subnetwork.draw.pdf'
	fig,ax=plt.subplots(10,1)
	for n,fn in enumerate(fn_ls):
		print "process: %s"%(fn)
		dfFile = dir + '/' + fn + '/' + '27-DG'
		frameFile = dfFile + '.txt'
		if to_dgframe:
			paris_dg2frame.DG2Frame(dfFile=dfFile, frameFile=frameFile)
		if get_inter_intra:
			inter, intra = 0, 0
			with open(frameFile, 'r') as TXT:
				for line in TXT:
					line = line.strip()
					if not line or line.startswith('#'):
						continue
					arr = line.split('\t')
					if arr[1] == arr[5]:
						intra += 1
					else:
						inter += 1
			fn_stat_dict[fn]['inter'] = inter
			fn_stat_dict[fn]['intra'] = intra
			fn_stat_dict[fn]['all'] = intra + inter
		if read_nx:
			df = pd.read_csv(frameFile, header=0, sep='\t')
			df['type'] = ['intra' if i == j else 'inter' for i,j in zip(df['lchr'], df['rchr'])]
			df_inter_RRI = df[df['type']=='inter']
			nx_inter_RRI = nx.from_pandas_dataframe(df_inter_RRI, 'lchr', 'rchr')
			fn_stat_dict[fn]['uniq RRI']  = len(nx_inter_RRI.edges())
			if interRRI_norRNA:
				df_inter_RRI = df_inter_RRI[(df_inter_RRI['ltype'].isin(['mRNA', 'lncRNA'])) & (df_inter_RRI['rtype'].isin(['mRNA', 'lncRNA']))]
			df_inter_RRI = df_inter_RRI[df_inter_RRI['support']>=support_read]
			nx_inter_RRI = nx.from_pandas_dataframe(df_inter_RRI, 'lchr', 'rchr')
			nx_inter_RRI_info_dict, G_largest = RRI_network_property2(nx_inter_RRI)
			for i,j in nx_inter_RRI_info_dict.items():
				fn_stat_dict[fn][i] = j
			# fn_stat_dict[fn]['uniq RRI']  = len(nx_inter_RRI.edges())
			if n < 10:
				draw_graph(G_largest, ax=ax[n])
	plt.savefig(downsampling_N_draw)
	savefn = dir + '.stat.txt'
	fn_stat_df = pd.DataFrame.from_dict(fn_stat_dict)
	fn_stat_df = fn_stat_df.T
	fn_stat_df['sampling'] = fn_stat_df.index
	print fn_stat_df.head()

	fn_stat_df.to_csv(savefn, header=True, index=False, sep='\t')

	return fn_stat_df
示例#9
0
def set_attributes(dataframe, attribute_dataframe, graph_type='dir'):
    """ Returns a network with attributes assigned to each node
        Input parameters:
        1. dataframe - edge list
        2. attribute_dataframe - contains node id, and attributes (name, parrty, nationality, occupation, gender)
        3. graph_type - 'dir' or 'undir'
    """

    # load dataframe as graph
    if graph_type == 'dir':
        G = nx.from_pandas_dataframe(dataframe,
                                     'from',
                                     'to',
                                     edge_attr=False,
                                     create_using=nx.DiGraph())
    elif graph_type == 'undir':
        G = nx.from_pandas_dataframe(dataframe,
                                     'from',
                                     'to',
                                     edge_attr=False,
                                     create_using=nx.Graph())


#     G = nx.from_pandas_dataframe(dataframe,'from','to')
# get list of nodes
    node_list = G.nodes()
    # create dictionaries
    data = attribute_dataframe[attribute_dataframe["ID"].isin(node_list)]
    name_data = data[["ID", "name"]].set_index('ID')['name'].to_dict()
    gender_data = data[["ID", "gender"]].set_index('ID')['gender'].to_dict()
    occupation_data = data[["ID", "occupation"
                            ]].set_index('ID')['occupation'].to_dict()
    nationality_data = data[["ID", "nationality"
                             ]].set_index('ID')['nationality'].to_dict()
    party_data = data[["ID", "party"]].set_index('ID')['party'].to_dict()
    birth = data[["ID", "birthDate"]].set_index('ID')['birthDate'].to_dict()
    death = data[["ID", "deathDate"]].set_index('ID')['deathDate'].to_dict()

    # set attributes
    nx.set_node_attributes(G, 'gender', gender_data)
    nx.set_node_attributes(G, 'name', name_data)
    nx.set_node_attributes(G, 'occupation', occupation_data)
    nx.set_node_attributes(G, 'nationality', nationality_data)
    nx.set_node_attributes(G, 'party', party_data)
    nx.set_node_attributes(G, 'birthDate', birth)
    nx.set_node_attributes(G, 'deathDate', death)

    #print stuff
    num_n = len(G.nodes())
    num_e = len(G.edges())
    print("Number of nodes: ", num_n)
    print("Number of edges: ", num_e)

    return G, num_n, num_e
示例#10
0
 def test_from_dataframe_all_attr(self, ):
     Gtrue = nx.Graph([('E', 'C', {'cost': 9, 'weight': 10}),
                       ('B', 'A', {'cost': 1, 'weight': 7}),
                       ('A', 'D', {'cost': 7, 'weight': 4})])
     G = nx.from_pandas_dataframe(self.df, 0, 'b', True)
     self.assert_equal(G, Gtrue)
     # MultiGraph
     MGtrue = nx.MultiGraph(Gtrue)
     MGtrue.add_edge('A', 'D', cost=16, weight=4)
     MG = nx.from_pandas_dataframe(self.mdf, 0, 'b', True, nx.MultiGraph())
     self.assert_equal(MG, MGtrue)
示例#11
0
def Hstar_Graphs_Ignore_Time(df, graph_name, tslices, axs):
    if len(df.columns) == 3:
        G = nx.from_pandas_dataframe(df, 'src', 'trg', edge_attr='ts')
    else:
        G = nx.from_pandas_dataframe(df, 'src', 'trg')
    # force to unrepeated edgesA
    if 0: print nx.info(G)
    G = G.to_undirected()
    if 0: print nx.info(G)
    exit()
    # Derive the prod rules in a naive way, where
    prod_rules = PHRG.probabilistic_hrg_learning(G)
    g = pcfg.Grammar('S')
    for (id, lhs, rhs, prob) in prod_rules:
        g.add_rule(pcfg.Rule(id, lhs, rhs, prob))

    num_nodes = G.number_of_nodes()

    print "Starting max size"
    g.set_max_size(num_nodes)

    print "Done with max size"

    Hstars = []

    num_samples = 20
    print '*' * 40
    for i in range(0, num_samples):
        rule_list = g.sample(num_nodes)
        hstar = PHRG.grow(rule_list, g)[0]
        Hstars.append(hstar)

    # if 0:
    #   g = nx.from_pandas_dataframe(df, 'src', 'trg', edge_attr=['ts'])
    #   draw_degree_whole_graph(g,axs)
    #   draw_degree(Hstars, axs=axs, col='r')
    #   #axs.set_title('Rules derived by ignoring time')
    #   axs.set_ylabel('Frequency')
    #   axs.set_xlabel('degree')

    if 1:
        # metricx = [ 'degree','hops', 'clust', 'assort', 'kcore','eigen','gcd']
        metricx = ['eigen']
        g = nx.from_pandas_dataframe(df, 'src', 'trg', edge_attr=['ts'])
        # graph_name = os.path.basename(f_path).rstrip('.tel')
        print ">", graph_name
        metrics.network_properties([g],
                                   metricx,
                                   Hstars,
                                   name=graph_name,
                                   out_tsv=True)
示例#12
0
文件: MIA.py 项目: nd7141/PIMUS
def read_graph(filename, directed=True, sep=' ', header = None):
    """
    Create networkx graph using pandas.
    :param filename: every line (u, v)
    :param directed: boolean
    :param sep: separator in file
    :return
    """
    df = pd.read_csv(filename, sep=sep, header = header)
    if directed:
        G = nx.from_pandas_dataframe(df, 0, 1, create_using=nx.DiGraph())
    else:
        G = nx.from_pandas_dataframe(df, 0, 1)
    return G
示例#13
0
def net_info(edgelist_fname):
    dfs = Pandas_DataFrame_From_Edgelist([edgelist_fname])
    df = dfs[0]

    try:
        g = nx.from_pandas_dataframe(df, 'src', 'trg', edge_attr=['ts'])
    except Exception:
        g = nx.from_pandas_dataframe(df, 'src', 'trg')

    if df.empty:
        g = nx.read_edgelist(edgelist_fname, comments="%")
    gn = graph_name(edgelist_fname)

    return (gn, g.number_of_nodes(), g.number_of_edges())
示例#14
0
 def test_from_dataframe_all_attr(self,):
     Gtrue = nx.Graph(
         [
             ("E", "C", {"cost": 9, "weight": 10}),
             ("B", "A", {"cost": 1, "weight": 7}),
             ("A", "D", {"cost": 7, "weight": 4}),
         ]
     )
     G = nx.from_pandas_dataframe(self.df, 0, "b", True)
     self.assert_equal(G, Gtrue)
     # MultiGraph
     MGtrue = nx.MultiGraph(Gtrue)
     MGtrue.add_edge("A", "D", cost=16, weight=4)
     MG = nx.from_pandas_dataframe(self.mdf, 0, "b", True, nx.MultiGraph())
     self.assert_equal(MG, MGtrue)
示例#15
0
def read_graph_OLD(filename, directed=True, sep=' ', header=None):
    """
    Create networkx graph using pandas.
    :param filename: every line (u, v)
    :param directed: boolean
    :param sep: separator in file
    :return
    """
    df = pd.read_csv(filename, sep=sep, header=header)
    if directed:
        G = nx.from_pandas_dataframe(df, 0, 1, create_using=nx.DiGraph())
    else:
        G = nx.from_pandas_dataframe(df, 0, 1)
    print('Read graph')
    return G
def graph_from_pathway(pathway):
    edge_list = pathway[['#tail', 'head']]
    g = nx.from_pandas_dataframe(edge_list,
                                 '#tail',
                                 'head',
                                 create_using=nx.DiGraph())
    return (g)
示例#17
0
def make_graph(edgesdf, edge_attr='fdist', name=None):
	g = nx.from_pandas_dataframe(edgesdf, source='node1',
								 target='node2', edge_attr=edge_attr)
	if name:
		g.add_node('namenode', name=name)

	return g
示例#18
0
def Structure_Varying_Overtime(df, hrBlck, axs):
    # import datetime
    # red_patch = mpatches.Patch(color='red', label='uniq nodes')
    # blu_patch = mpatches.Patch(color='blue', label='edges')
    print '{} hr'.format(hrBlck)
    dat = {}
    clqs = {}
    agg_hrs = 0
    for s in range(df['ts'].min(), df['ts'].max(), int(3600 * hrBlck)):
        mask = (df['ts'] >= s) & (df['ts'] < s + 3600 * hrBlck)
        tdf = df.loc[mask]
        agg_hrs += hrBlck
        SG = nx.from_pandas_dataframe(tdf, 'src', 'trg', ['ts'])
        dat[agg_hrs] = np.mean(SG.degree().values())
        cliq = nx.find_cliques(SG)
        clqs[agg_hrs] = np.mean([len(c) for c in cliq])
    xvals = sorted(dat.keys())
    #print [datetime.datetime.fromtimestamp(d).strftime("%d/%m") for d in xvals]
    yvals = [dat[x] for x in xvals]
    axs.plot(xvals, yvals, '.', linestyle="-", label='Avg degree')
    # Save to disk the need files
    with open("Results/avg_degree_structure_in_{}hrs.tsv".format(hrBlck),
              'w') as f:
        for k in range(0, len(yvals)):
            f.write("({},{})\n".format(xvals[k], yvals[k]))

    yvals = [clqs[x] for x in xvals]
    axs.plot(xvals, yvals, '.', linestyle="-", label="Avg clique size")
    axs.set_xlabel('hours')
    # Save to disk the need files
    with open("Results/avg_cliq_size_structure_in_{}hrs.tsv".format(hrBlck),
              'w') as f:
        for k in range(0, len(yvals)):
            f.write("({},{})\n".format(xvals[k], yvals[k]))
    return
def simm2net(simm):
    stacked = simm.stack()
    sliced = stacked[stacked >= 0.7]
    net = nx.from_pandas_dataframe(sliced.reset_index(),
                                   "level_0", "level_1")
    net.add_nodes_from(att_mtx.columns)
    return net
示例#20
0
def generate_network_plot_univ(network_data, label):
    """

    :param df_journal:
    :return:
    """

    # Create new column with number of publications by authors
    network_data['count'] = ''

    n_publications = network_data['from'].value_counts()

    # Set the number of publications to each author
    for name in n_publications.index:
        network_data.loc[(network_data['from'] == name), 'count'] = int(
            n_publications[n_publications.index == name].values)

    # Build your graph
    G = nx.from_pandas_dataframe(network_data, 'from', 'to')

    # labels = {}
    #
    # for node in G.nodes():
    #     if node in list(network_data['label'].dropna().values):
    #         labels[node] = node

    # Plot it
    nx.draw(G,
            with_labels=label,
            node_size=list(network_data['count'].values * 10))
    plt.show()
示例#21
0
    def _individual_measures(self):

        print("Running NX analysis for {0} and timeframe length {1}".format(self._owner,
                                                                            conf.a_length_timeframe))
        res_louvain = {}
        res_degree_centrality = {}
        res_betweenness_centrality = {}
        res_eigenvector_centrality = {}
        res_modularity = {}

        time_start = time.time()
        for dt in rrule.rrule(rrule.WEEKLY, dtstart=self._startdt, until=self._enddt):
            lap_time = time.time()

            links = self._controller.get_communication_subgraph(self._owner, dt)

            if not links.empty:
                multi_graph = nx.from_pandas_dataframe(links,
                                                       source="source",
                                                       target="target",
                                                       create_using=nx.MultiGraph())

                if conf.a_louvain:
                    partition = nxlouvain.best_partition(multi_graph)
                    res_louvain[dt.strftime("%Y-%m-%d")] = partition

                if conf.a_betweenness_centrality:
                    bc = nx.betweenness_centrality(multi_graph, normalized=True)
                    res_betweenness_centrality[dt.strftime("%Y-%m-%d")] = bc

                if conf.a_degree_centrality:
                    dc = nx.degree_centrality(multi_graph)
                    res_degree_centrality[dt.strftime("%Y-%m-%d")] = dc

                if conf.a_modularity:
                    mod = nxlouvain.modularity(partition, multi_graph)
                    res_modularity[dt.strftime("%Y-%m-%d")] = mod

                if conf.a_eigenvector_centrality:
                    pass
                    # simple_graph = self.convert_to_simple(multi_graph)

                    # TODO: eigenvector centrality calculation fails occasionally
                    # reason may be that nx.eigenvector_centrality() can't handle star graphs
                    # https://stackoverflow.com/questions/43208737/using-networkx-to-calculate-eigenvector-centrality
                    # ec = nx.eigenvector_centrality(simple_graph)
                    # res_eigenvector_centrality[dt.strftime("%Y-%m-%d")] = ec

            if conf.output_verbose:
                print("current: {0} - time: {0:.2f}s".format(dt.date(), time.time() - lap_time))

        print("{0:.2f}s".format(time.time()-time_start))
        print()

        self._modularity = res_modularity
        self._degree_centrality = res_degree_centrality
        self._betweenness_centrality = res_betweenness_centrality
        self._degree_centrality = res_degree_centrality
        self._eigenvector_centrality = res_eigenvector_centrality
        self._partition = res_louvain
示例#22
0
def createGraph(fileUrl):
    df = pd.read_csv(fileUrl, sep=',')  #,error_bad_lines=False
    G = nx.from_pandas_dataframe(df,
                                 source='Source',
                                 target='Target',
                                 edge_attr='weight')
    return G
示例#23
0
    def build_org_networks(self):
        """
        Build graphs for connected users where applicable

        Note degree and children will have high correlation
        """
        self.graphs = {}
        for group, df in self.groups['org_id']:
            graph_df = df[['invited_by_user_id', 'object_id'
                           ]][df.invited_by_user_id.notnull()].astype(int)
            if graph_df.shape[0] > 1:
                graph = nx.from_pandas_dataframe(graph_df.astype(int),
                                                 'invited_by_user_id',
                                                 'object_id')
                self.graphs[group] = graph
                degrees = np.array(graph.degree().items()).astype(int)
                self.dfs['users']['degree'].loc[degrees[:, 0]] = degrees[:, 1]
                for g in nx.connected_component_subgraphs(graph):
                    nodes = g.nodes()
                    lenNodes = len(nodes)
                    if lenNodes > 3:
                        self.dfs['users']['local_rank'].loc[nodes] = len(nodes)

        self.dfs['users']['children'] = self.dfs['users']['degree'] -\
                                        self.dfs['users']['invited_by_user_id'].notnull()
示例#24
0
def read_load_graph(fname):
    df = Pandas_DataFrame_From_Edgelist([fname])[0]
    G = nx.from_pandas_dataframe(df, source='src', target='trg')
    Gc = max(nx.connected_component_subgraphs(G), key=len)
    gname = graph_name(fname)
    Gc.name = gname
    return Gc
def MakeLFN(flow_years, nrows=None, manual_filepath=None, print_info=True):
    '''
    Make Labour Flow Network where one job change is sufficient for a link.

    Args:
        - input_filepath: text file with col format: firm1_ID, firm2_ID, number
        - nrows: option to restrict the number of rows of flows read in.
           this allows for the creation of a smaller graph for testing.
    Returns:
        - networkx graph

    '''
    #note that pandas deals with the header automatically so there
    # is no need to do skiprows=1
    df = MakeFlowsDF(flow_years, manual_filepath=manual_filepath, nrows=nrows)
    g = nx.from_pandas_dataframe(
                                df,
                                source='from_firm',
                                target='to_firm',
                                edge_attr=None #take care with edge_attr='Number'
                                # it will record only the second instance of the
                                # edge (ie it will only see one flow direction)
                                )
    #set graph name to be of the form 'Flows: year1-year2'
    graph_name = 'LFN with flows: ' + flow_years + '.'
    if nrows != None:
        ' '.join([graph_name, 'Warning: Only used first', str(nrows),
        'of flows from each year\'s csv file (eg, if 1996-1997 then nrows will',
        'have been used, but if 1996-1998 then 3*nrows will have been used).'])
    g.name = graph_name
    if print_info==True:
        print(nx.info(g))
    return g
示例#26
0
def load_node_data(df_adj_list, df_annotations, df_unlabelled):
    network = from_pandas_dataframe(df_adj_list,
                                    source="User_ID_x",
                                    target="User_ID_y",
                                    edge_attr=["Weights"])

    for label in df_annotations.columns.values:
        set_node_attributes(network,
                            values=pd.Series(
                                df_annotations[label],
                                index=df_annotations.index).to_dict(),
                            name=label)

    labelled_node_train, stance_train = map(
        np.array,
        zip(*[([node], value["Stance"] == "Favour")
              for node, value in network.nodes(data=True)
              if value["Stance"] in ["Favour", "Against"]]))

    unlabelled_node = map(
        np.array,
        zip(*[([node]) for node, value in network.nodes(data=True)
              if value["Stance"] == "NONE"]))

    return dataset(labelled_node_train, stance_train, unlabelled_node, network)
示例#27
0
def plot_nbr_prod_rules_per_ts(pddf, axs, kthSlice, nSlices):
    span = (pddf['ts'].max() - pddf['ts'].min()) / nSlices
    mask = (pddf['ts'] >= pddf['ts'].min() + span * kthSlice) & (
        pddf['ts'] < pddf['ts'].min() + span * (kthSlice + 1))
    print pddf.shape
    pddf = pddf.loc[mask]
    print pddf.shape

    # sg = nx.from_pandas_dataframe(pddf, 'src', 'trg', ['ts'])
    # if 0: print nx.info(sg)
    # cliq=nx.find_cliques(sg)
    # print sorted((len(c) for c in cliq))

    gb = pddf.groupby(['ts']).groups
    ts_cliq_cnt = {}
    for k in gb.keys():
        df = pddf.loc[gb[k]]
        sg = nx.from_pandas_dataframe(df, 'src', 'trg', ['ts'])
        cliq = nx.find_cliques(sg)
        ts_cliq_cnt[k] = [len(c) for c in cliq]

    df = pd.DataFrame.from_dict(ts_cliq_cnt.items(), dtype=np.int64)
    df['av'] = df[1].apply(lambda x: np.mean(x))

    df.sort_values(by=[0], inplace=True)
    # print df.head()
    # df['av'].plot(x=[0],ax=axs,color='b',alpha=0.75)
    axs.plot(df[0].values, df['av'].values, 'b', alpha=0.6)
    axs.set_xlabel('epochs')
    axs.set_ylabel('Avg Clique Length')

    return  # [_]
示例#28
0
def draw_weighted_graph(identifier, species=None, limit=100):
    result = creat_model_input(identifier, species, limit)
    G = nx.from_pandas_dataframe(result, 'interactor_A', 'interactor_B', [
        'score', 'nscore', 'fscore', 'pscore', 'hscore', 'ascore', 'escore',
        'dscore', 'tscore'
    ])
    elarge = [(u, v) for (u, v, d) in G.edges(data=True)
              if float(d['score']) > 0.7]
    esmall = [(u, v) for (u, v, d) in G.edges(data=True)
              if float(d['score']) <= 0.7]
    pos = nx.random_layout(G)  # positions for all nodes
    nx.draw_networkx_nodes(G, pos, node_size=80)  # nodes
    nx.draw_networkx_edges(G, pos, edgelist=elarge, width=0.5)
    nx.draw_networkx_edges(G,
                           pos,
                           edgelist=esmall,
                           width=0.5,
                           alpha=0.5,
                           edge_color='b',
                           style='dashed')
    nx.draw_networkx_labels(G, pos, font_size=3,
                            font_family='sans-serif')  # labels
    plt.axis('off')
    plt.savefig(identifier + "_weighted_graph.png", dpi=500)  # save as png
    plt.show()  # display
示例#29
0
def moran(data, *args):

    dist = pd.DataFrame(
        pd.read_csv(PATH +
                    '/processed/%s/distance_area/dist_matrix_adm%s.csv' %
                    (country, list(args[0])[-1])))
    response = data.groupby(args[0])[args[1]].mean().reset_index().dropna()

    non_missing = np.array(response[args[0]])
    dist = dist[dist['Source'].isin(non_missing)]
    dist = dist[dist['Target'].isin(non_missing)]

    G = nx.from_pandas_dataframe(dist, 'Source', 'Target', 'Distance_km')
    H = nx.adjacency_matrix(G, weight='Distance_km').todense()

    H[H > 500] = 0

    w = np.reciprocal(H + 1)
    response = np.array(response[args[1]])

    x_bar = np.mean(response)
    adms = len(w)
    c = np.zeros((adms, adms))
    for i in range(adms):
        for j in range(adms):
            c[i, j] = (response[i] - x_bar) * (response[j] - x_bar)

    mysum = np.sum(np.multiply(c, w))
    s2 = np.sum(np.power(response - x_bar, 2)) / adms
    sumw = np.sum(w)
    I = mysum / (s2 * sumw)
    print I
示例#30
0
def build_file_move_graph(file_frame):
    move_frame = file_frame[file_frame.move]
    move_graph = nx.from_pandas_dataframe(
        move_frame,
        source='file_path1', target='file_path2',
        edge_attr='hexsha', create_using=nx.DiGraph())
    return move_graph
示例#31
0
def build_graph(data):
    ### Construct graph G from df
    # Adding the weight to prepare for nx
    df = data.groupby(['A_NUMBER', 'B_NUMBER'])['DURATION'].sum().reset_index()
    df['DURATION_SEC'] = df['DURATION'] / np.timedelta64(1, 's')
    G = nx.from_pandas_dataframe(df, 'A_NUMBER', 'B_NUMBER', ['DURATION_SEC'])
    return G
示例#32
0
 def __init__(self, path):
     '''Constructor'''
     edges = pd.read_csv(path, header=0)
     self.l = edges.columns.values.tolist()
     self.l.remove('source')
     self.l.remove('target')
     self.G = nx.from_pandas_dataframe(edges, 'source', 'target', self.l, create_using=nx.DiGraph())
示例#33
0
def find_centrality(df, cent_type='betweenness', keep_thresh=0.5):
    df_b = df.copy()
    df_b[(df.abs() < keep_thresh)] = 0  #eliminate edges that are too weak
    labels = list(df_b.index)
    temp = abs(df_b.copy())
    temp.insert(0, 'var1', labels)
    df_b = pandas.melt(temp, 'var1', var_name='var2', value_name='edge')
    df_b = df_b.loc[(df_b['edge'] >
                     0), :]  # take only those edge pairs that made the cut
    df_g = networkx.from_pandas_dataframe(
        df_b, 'var1', 'var2', 'edge')  # takes a list of valid edges
    if cent_type == 'betweenness':
        centrality = networkx.betweenness_centrality(df_g)
    elif cent_type == 'degree':
        centrality = networkx.degree_centrality(df_g)
    elif cent_type == 'closeness':
        centrality = networkx.closeness_centrality(df_g)
    elif cent_type == 'eigenvector':
        centrality = networkx.eigenvector_centrality(df_g)
    else:
        print('error, unknown centrality')
        return -1
    centrality_df = pandas.DataFrame.from_dict(centrality, orient='index')
    centrality_df.sort_values(0, axis=0, ascending=False, inplace=True)
    centrality_df = centrality_df.transpose()

    return centrality_df, df_g
示例#34
0
def edgelist_dimacs_graph(orig_graph, peo_h, prn_tw=False):
    fname = orig_graph
    gname = os.path.basename(fname).split(".")
    gname = sorted(gname, reverse=True, key=len)[0]

    if ".tar.bz2" in fname:
        from tdec.read_tarbz2 import read_tarbz2_file
        edglst = read_tarbz2_file(fname)
        df = pd.DataFrame(edglst, dtype=int)
        G = nx.from_pandas_dataframe(df, source=0, target=1)
    else:
        G = nx.read_edgelist(fname, comments="%", data=False, nodetype=int)
    # print "...",	G.number_of_nodes(), G.number_of_edges()
    # from numpy import max
    # print "...",	max(G.nodes()) ## to handle larger 300K+ nodes with much larger labels

    N = max(G.nodes())
    M = G.number_of_edges()
    # +++ Graph Checks
    if G is None: sys.exit(1)
    G.remove_edges_from(G.selfloop_edges())
    giant_nodes = max(nx.connected_component_subgraphs(G), key=len)
    G = nx.subgraph(G, giant_nodes)
    graph_checks(G)
    # --- graph checks

    G.name = gname

    # print "...",	G.number_of_nodes(), G.number_of_edges()
    if G.number_of_nodes() > 500 and not prn_tw:
        return (nx_edges_to_nddgo_graph_sampling(G, n=N, m=M,
                                                 peo_h=peo_h), gname)
    else:
        return (nx_edges_to_nddgo_graph(G, n=N, m=M, varel=peo_h), gname)
示例#35
0
def DataFrame_to_lG(df,
                    directed=False,
                    source='source',
                    target='target',
                    time='time',
                    weight=None):

    ti, tf = df[time].min(), df[time].max() + 1

    if directed:
        graph = nx.DiGraph
    else:
        graph = nx.Graph

    lG = []
    for t in range(ti, tf):

        cut = df[df[time] == t]
        if cut.shape[0] == 0:
            G = graph()
        else:
            G = nx.from_pandas_dataframe(cut,
                                         source=source,
                                         target=target,
                                         edge_attr=weight,
                                         create_using=graph())

        lG.append(G)

    return lG
示例#36
0
def extract_groups(m2m):
    """Extracts a list of groups from a social network varying through time.
    
    Groups are defined as connected components of the social graph at a given
    time bin.
    
    Parameters
    ----------
    m2m : pd.DataFrame
        The social network, for instance, member-to-member bluetooth proximity
        data.  It must have the following columns: 'datetime', 'member1', and
        'member2'.
    
    Returns
    -------
    pd.DataFrame :
        The groups, as a sets of members with datetime.
    """
    groups = m2m.groupby('datetime').apply(lambda df: pd.Series([
        frozenset(c) for c in nx.connected_components(
            nx.from_pandas_dataframe(df.reset_index(), 'member1', 'member2'))
    ]))
    groups.name = 'members'

    return groups.reset_index()[['datetime', 'members']]
示例#37
0
def init_simple():
	global max_depth
	global T
	global grid_dim_x
	global grid_dim_y
	global base
	global route_length
	global total_distance
	global total_reward

	total_distance = 0
	total_reward = 0
	T = 4 # Num. of iterations
	grid_dim_x = 1
	grid_dim_y = 2
	base = 0.0 # Starting node number
	route_length = 10 # Distance limit

	global graph
	global dist
	global sigma1
	global avg_strat1
	global regret1
	global route

	df = pd.read_csv("simple/nodes_list.txt", sep=" ")
	dist = pd.read_csv("simple/dist_simple.gop", sep=" ", header=None)
	graph = \
		nx.from_pandas_dataframe(df, source='node_from', target='node_to',
		                         edge_attr=['distance', 'animal_density', 'grid_cell_x', 'grid_cell_y'])
	sigma1 = [[1 / (grid_dim_x * grid_dim_y)] * grid_dim_y] * grid_dim_x
	avg_strat1 = [[0] * grid_dim_y] * grid_dim_x
	regret1 = [[0] * grid_dim_y] * grid_dim_x
	route = []
def simple_visualization (airport_df, routes_df):
	if (airport_df is None) or (routes_df is None):
		print "Data cannot be retrieved and read"
	else:
		airport_us = airport_df[(airport_df.Country == "United States")][['Name','Lat', 'Long', 'IATA', 'ICAO']]
		us_airport_ix = airport_us.index.values
		routes_us = routes_df[(routes_df['Source Airport ID'].isin(us_airport_ix)) &
		                     (routes_df['Dest Airport ID'].isin(us_airport_ix))] #extract routes that flyies from AND to USA
		routes_us =  pd.DataFrame(routes_us.groupby(['Source Airport', 'Dest Airport']).size().reset_index(name='counts'))
		# to find number of flights in and out of an airport
		# it is similar to find number of rows in which each airport occur in either one of the 2 columns
		counts = routes_us['Source Airport'].append(routes_us.loc[routes_us['Source Airport'] != routes_us['Dest Airport'], 'Dest Airport']).value_counts()
		# create a data frame of position based on names in count
		counts = pd.DataFrame({'IATA': counts.index, 'total_flight': counts})
		pos_data = counts.merge(airport_us, on = 'IATA')

		# Create graph
		graph = nx.from_pandas_dataframe(routes_us, source = 'Source Airport', target = 'Dest Airport',
		                        edge_attr = 'counts',create_using = nx.DiGraph())

		# default graph using Networkx inbuilt graph tools
		plt.figure(figsize = (10,9))
		nx.draw_networkx(graph)
		plt.savefig("./images/networkx_basemap/map_0.png", format = "png", dpi = 300)
		plt.show()

		# Set up base map
		plt.figure(figsize=(15,20))
		m = Basemap(
		        projection='merc',
		        llcrnrlon=-180,
		        llcrnrlat=10,
		        urcrnrlon=-50,
		        urcrnrlat=70,
		        lat_ts=0,
		        resolution='l',
		        suppress_ticks=True)

		# import long lat as m attribute
		mx, my = m(pos_data['Long'].values, pos_data['Lat'].values)
		pos = {}
		for count, elem in enumerate (pos_data['IATA']):
		    pos[elem] = (mx[count], my[count])

		# draw nodes and edges and over aly on basemap
		nx.draw_networkx_nodes(G = graph, pos = pos, node_list = graph.nodes(), node_color = 'r', alpha = 0.8,
		                       node_size = [counts['total_flight'][s]*3 for s in graph.nodes()])
		nx.draw_networkx_edges(G = graph, pos = pos, edge_color='g', width = routes_us['counts']*0.75, 
		                       alpha=0.2, arrows = False)

		m.drawcountries(linewidth = 3)
		m.drawstates(linewidth = 0.2)
		m.drawcoastlines(linewidth=3)
		plt.tight_layout()
		plt.savefig("./images/networkx_basemap/map_2.png", format = "png", dpi = 300)
		plt.show()
		print ("successful visualization")
		return 0
示例#39
0
 def test_from_dataframe_multi_attr(self,):
     Gtrue = nx.Graph(
         [
             ("E", "C", {"cost": 9, "weight": 10}),
             ("B", "A", {"cost": 1, "weight": 7}),
             ("A", "D", {"cost": 7, "weight": 4}),
         ]
     )
     G = nx.from_pandas_dataframe(self.df, 0, "b", ["weight", "cost"])
     self.assert_equal(G, Gtrue)
示例#40
0
文件: words.py 项目: anbasile/mwe
def calculate(words):
    # instantiate a dictionary to later be filled with word:miscores
    wc = defaultdict(float)
    frames = []
    print("...it will take a while. Wait a sec...")
    for word in words:
        payload = {'searchstring': word.encode('ascii'),
                   'searchpositional':'word',
                   'searchpostag':'all',
                   'contextsize':'60c',
                   'sort2':'right',
                   'terminate':'100',
                   'searchtype':'coll',
                   'mistat':'on',
                   'collocspanleft':'2',
                   'collocspanright':'2',
                   'collocfilter':'noun'}

        r = requests.get("http://clic.cimec.unitn.it/cgi-bin/cqp/cqp.pl?corpuslist=WEBBIT", params=payload)
        soup = BeautifulSoup(r.content, 'lxml')

        # parse the html table and extract words and miscores. Add scores
    
        temp = []
        for tr in soup.find_all('tr')[1:]:
            tds = tr.find_all('td')
            word = tds[0].text.split('~~')[1]
            mi = float(tds[4].text)
            wc[word] += mi
            temp.append(map(lambda x:x.text,tds[0:]))
        x = pd.DataFrame(temp)
        df = pd.DataFrame()
        df['coll'] = x.ix[0:,0].apply(lambda x: x.split('~~')[1])
        df['word'] = x.ix[0:,0].apply(lambda x: x.split('~~')[0])
        df['mi'] = x.ix[0:,4]
        frames.append(df)

    #sort the results in decreasing order        
    results = []
    for w in sorted(wc, key=wc.get, reverse=True):
        results.append((w, wc[w]))

    #spit out the top result. If using ipython you can check the rest of the list by tiping `results`
    #viz part
    results_df = pd.concat(frames)

    G=nx.from_pandas_dataframe(results_df, 'word','coll',['mi'])
    mat = nx.adjacency_matrix(G).todense()
    viz = lgn.force(mat)
    vid = viz.id
    print(vid)
    url = '<iframe src="http://public.lightning-viz.org/visualizations/'+vid+'/iframe/" width=100% height=400px>'
    return (results[0][0].strip(),url)
示例#41
0
def make_graph(cutdf):
    """
    Convert dataframe of a list of edges into a networkx graph object

    Args:
        cutdf : pandas dataframe containing list of edges with
            features 'source' and 'target' containing node numbers
    Returns:
        networkx graph object
    """
    g = nx.from_pandas_dataframe(cutdf, 'source', 'target')
    return g
示例#42
0
    def weightGraph(self, datacontacts, mi_threshold, time_treshold=0.6):
        if len(self.mol.get('resid', 'name CA')) != len(self.resids):
            raise Exception('The length of the protein doesn\'t match the Mutual Information data')
        contactcat = np.concatenate(datacontacts.dat)
        contacts_matrix = np.zeros([len(self.resids), len(self.resids)])
        for i in range(contactcat.shape[1]):
            counter = np.count_nonzero(contactcat[:, i])
            resid1 = self.residmap[self.mol.resid[datacontacts.description.atomIndexes[i][0]]]
            resid2 = self.residmap[self.mol.resid[datacontacts.description.atomIndexes[i][1]]]
            contacts_matrix[resid1][resid2] = counter

        self.graph_array = np.zeros([contacts_matrix.shape[0], contacts_matrix.shape[0]])
        mask = (self.mi_matrix > mi_threshold) & (contacts_matrix > (time_treshold * contactcat.shape[0]))
        self.graph_array[mask] = self.mi_matrix[mask]

        intermed = []
        for source in range(self.graph_array.shape[0]):
            for target in range(source, self.graph_array.shape[1]):
                if self.graph_array[source, target] != 0 and target > source:
                    intermed.append(
                        [int(self.resids[source]), int(self.resids[target]), float(self.graph_array[source, target])])
        import pandas as pd
        import networkx as nx
        from sklearn.cluster.spectral import SpectralClustering

        pd = pd.DataFrame(intermed, columns=['source', 'target', 'weight'])
        pd[['source', 'target']] = pd[['source', 'target']].astype(type('int', (int,), {}))
        pd['weight'] = pd['weight'].astype(type('float', (float,), {}))
        G = nx.from_pandas_dataframe(pd, 'source', 'target', ['weight'])
        ## setSegment
        segids = self.mol.get('segid', 'name CA')
        seg_res_dict = {key: value for (key, value) in zip(self.resids, segids) if
                        np.any(pd.loc[(pd['source'] == key)].index) or np.any(pd.loc[(pd['target'] == key)].index)}
        nx.set_node_attributes(G, 'Segment', seg_res_dict)
        ## set
        if not nx.is_connected(G):
            G = max(nx.connected_component_subgraphs(G), key=len)
        flow_cent = nx.current_flow_betweenness_centrality(G, weight='weight')
        nx.set_node_attributes(G, 'flowcent', flow_cent)
        Spectre = SpectralClustering(n_clusters=10, affinity='precomputed')
        model = Spectre.fit_predict(self.graph_array)
        model = model.astype(type('float', (float,), {}))
        spectral_dict = {key: value for (key, value) in zip(self.resids, model) if key in G.nodes()}
        nx.set_node_attributes(G, 'spectral', spectral_dict)
        self.graph = G
def corr_to_graph(roi_corrs, copy_corrs=False):
    """
    >>> import pandas as pd
    >>> import numpy as np
    >>> corrs = pd.DataFrame(np.random.rand(2,2))
    >>> corrs.index = ['A', 'B']
    >>> corrs.columns = ['A', 'B']
    >>> graph = corr_to_graph(corrs)
    >>> ab = graph['A']['B']
    >>> wt, prox, dist = ab['weight'], ab['proximity'], ab['distance']
    >>> assert wt == corrs['B']['A'] #upper triangular
    >>> assert prox == wt
    >>> assert dist == 1 - wt
    >>> assert len(graph) == 2
    """
    roi_corrs = create_convertible_corr_df(roi_corrs, copy_corrs)
    return nx.from_pandas_dataframe(roi_corrs, 'source', 'target',
                                    edge_attr=['distance', 'proximity', 'weight'])
示例#44
0
 def test_from_datafram(self, ):
     # Pandas DataFrame
     g = nx.cycle_graph(10)
     G = nx.Graph()
     G.add_nodes_from(g)
     G.add_weighted_edges_from((u, v, u) for u, v in g.edges())
     edgelist = nx.to_edgelist(G)
     source = [s for s, t, d in edgelist]
     target = [t for s, t, d in edgelist]
     weight = [d['weight'] for s, t, d in edgelist]
     import pandas as pd
     edges = pd.DataFrame({'source': source,
                           'target': target,
                           'weight': weight})
     GG = nx.from_pandas_dataframe(edges, edge_attr='weight')
     assert_nodes_equal(sorted(G.nodes()), sorted(GG.nodes()))
     assert_edges_equal(sorted(G.edges()), sorted(GG.edges()))
     GW = nx.to_networkx_graph(edges, create_using=nx.Graph())
     assert_nodes_equal(sorted(G.nodes()), sorted(GW.nodes()))
     assert_edges_equal(sorted(G.edges()), sorted(GW.edges()))
def read_inter_RRI(inter_RRI=None, filter_rRNA=False, support_read_num=2, only_mRNA_lncRNA=False):
	if inter_RRI is None:
		inter_RRI = '/Share/home/zhangqf7/gongjing/zebrafish/data/paris/shi-zp-2/27-DG.inter.element.txt'
	df_inter_RRI = pd.read_csv(inter_RRI, header=None, sep='\t')
	if filter_rRNA:
		df_inter_RRI = df_inter_RRI[(df_inter_RRI[13] != 'rRNA') & (df_inter_RRI[14] != 'rRNA')]
	if only_mRNA_lncRNA:
		only_mRNA_lncRNA_index = (df_inter_RRI[13].isin(['mRNA', 'lncRNA'])) & (df_inter_RRI[14].isin(['mRNA', 'lncRNA']))
		df_inter_RRI = df_inter_RRI[only_mRNA_lncRNA_index]
	header_ls = ['Group', 'lchr', 'lstrand', 'lstart', 'lend', 'rchr', 'rstrand',
				'rstart', 'rend', 'support', 'lcount', 'rcount', 'score', 'ltype', 'rtype', 'RRI_type', 'lcontext', 'rcontext']
	df_inter_RRI.columns = header_ls
	df_inter_RRI = df_inter_RRI[df_inter_RRI['support'] >= support_read_num]
	print df_inter_RRI.head()
	nx_inter_RRI = nx.from_pandas_dataframe(df_inter_RRI, 'lchr', 'rchr', edge_attr=['support', 'Group'])
	print "\nread: %s"%(inter_RRI)
	print nx.info(nx_inter_RRI)
	print

	return nx_inter_RRI, df_inter_RRI
示例#46
0
    def create_nwrk(self,
                    nodes_cols,
                    attribs_cols):
        """Return Ortho_Network.nwrk upon pandas.DataFrame.

        Parameters
        -------
        nodes_cols: list
            Columns to take as nodes.
        attribs_cols: list
            Columns to take as attributes.

        Returns
        -------
        P_CRAWLER.Ortho_Network.nwrk
            Interactions-based network. networkx.classes.graph.Graph derivate.
        """
        self.nwrk = nx.from_pandas_dataframe(self.inter_df,
                                             nodes_cols[0],
                                             nodes_cols[1],
                                             attribs_cols)
示例#47
0
def main():
    parser = ArgumentParser()
    parser.add_argument("-i", "--infile", type=str,
            help="Table with one row per link between families, columns: ['Family1','Family2'].\
                    If not specified the program reads from stdin.")
    parser.add_argument("--maxlink", default=6, type=int,
            help="Maximum number of allowed outgoing links (edges) for a single protein family. Defaults to 6")
    parser.add_argument("--minoc", default=10, type=int,
            help="Minimum number of occurrences for linking two families. Defaults to 10")
    parser.add_argument("--trimmed_out", type=str,
            help="Write trimmed families to file")

    args = parser.parse_args()
    
    if args.infile: linkdf = pd.read_csv(args.infile, sep="\t", header=0)
    else: linkdf = pd.read_csv(sys.stdin, sep="\t", header=0)
    linkdf.fillna("",inplace=True)
    oc = count_occurrences(linkdf)
    
    ## Create graph from data frame
    g = nx.from_pandas_dataframe(linkdf,source="Node1",target="Node2")
    g.remove_node('')
    ## Trim nodes by outgoing edges and edges by occurrence
    [gt,trimmed_nodes,trimmed_edges] = trim_graph(g,args.maxlink, args.minoc, oc)
    if args.trimmed_out: pd.DataFrame(trimmed_nodes).to_csv(args.trimmed_out,sep="\t",index=False,header=False)
    logging.info("Removed "+str(len(trimmed_edges))+" links due to low occurrence")    
    logging.info("Removed "+str(len(trimmed_nodes))+" families with too many links ("+str(len(gt.nodes()))+" remaining)")
    
    ## Create clusters for graph
    clusters = cluster(gt)
    logging.info(str(len(clusters))+" clusters created")    
    cdf = pd.DataFrame(clusters).T
    ## Sort by number of families in cluster
    cdf.sort_values("num",ascending=False,inplace=True)
    cdf.index = list(range(1,len(cdf)+1))
    
    ## Write clusters sorted by size
    write(cdf)
def advanced_visualization (airport_df, routes_df):
	if (airport_df is None) or (routes_df is None):
		print ("Data cannot be retrieved and read")
	else:
		airport_us = airport_df[(airport_df.Country == "United States") & (airport_df.Lat > 25) 
								& (airport_df.Lat < 50) & (airport_df.Long > -130) & (airport_df.Long < -60)]
		us_airport_ix = airport_us.index.values
		routes_us = routes_df[(routes_df['Source Airport ID'].isin(us_airport_ix)) &
		                     (routes_df['Dest Airport ID'].isin(us_airport_ix))] #extract routes that flyies from AND to USA
		routes_us =  pd.DataFrame(routes_us.groupby(['Source Airport', 'Dest Airport']).size().reset_index(name='counts'))
		# to find number of flights in and out of an airport
		# it is similar to find number of rows in which each airport occur in either one of the 2 columns
		counts = routes_us['Source Airport'].append(routes_us.loc[routes_us['Source Airport'] != routes_us['Dest Airport'], 'Dest Airport']).value_counts()
		# create a data frame of position based on names in count
		counts = pd.DataFrame({'IATA': counts.index, 'total_flight': counts})
		pos_data = counts.merge(airport_us, on = 'IATA')

		# Create graph
		graph = nx.from_pandas_dataframe(routes_us, source = 'Source Airport', target = 'Dest Airport',
		                        edge_attr = 'counts',create_using = nx.DiGraph())

		# Set up base map
		plt.figure(figsize=(15,20))
		m = Basemap(
		        projection='merc',
		        llcrnrlon=-180,
		        llcrnrlat=10,
		        urcrnrlon=-50,
		        urcrnrlat=70,
		        lat_ts=0,
		        resolution='l',
		        suppress_ticks=True)

		# import long lat as m attribute
		mx, my = m(pos_data['Long'].values, pos_data['Lat'].values)
		pos = {}
		for count, elem in enumerate (pos_data['IATA']):
		    pos[elem] = (mx[count], my[count])

		# draw nodes and edges and overly on basemap
		nx.draw_networkx_nodes(G = graph, pos = pos, nodelist = [x for x in graph.nodes() if counts['total_flight'][x] >= 100],
		                       node_color = 'r', alpha = 0.8,
		                       node_size = [counts['total_flight'][x]*4  for x in graph.nodes() if counts['total_flight'][x] >= 100])

		nx.draw_networkx_labels(G = graph, pos = pos, font_size=10,
		                        labels = {x:x for x in graph.nodes() if counts['total_flight'][x] >= 100})

		nx.draw_networkx_nodes(G = graph, pos = pos, nodelist = [x for x in graph.nodes() if counts['total_flight'][x] < 100],
		                       node_color = 'b', alpha = 0.6,
		                       node_size = [counts['total_flight'][x]*4  for x in graph.nodes() if counts['total_flight'][x] < 100])

		nx.draw_networkx_edges(G = graph, pos = pos, edge_color = 'g', width = routes_us['counts']*0.75, 
		                       alpha=0.06, arrows = False)

		m.drawcountries(linewidth = 3)
		m.drawstates(linewidth = 0.2)
		m.drawcoastlines(linewidth=1)
		m.fillcontinents(alpha = 0.3)
		line1 = mlines.Line2D(range(1), range(1), color="white", marker='o', markerfacecolor="red")
		line2 = mlines.Line2D(range(1), range(1), color="white", marker='o',markerfacecolor="blue")
		line3 = mlines.Line2D(range(1), range(1), color="green", marker='',markerfacecolor="green")
		plt.legend((line1, line2, line3), ('Large Airport > 100 routes', 'Smaller airports', 'routes'),
		           loc=4, fontsize = 'xx-large')
		plt.title("Network graph of flight routes in the USA", fontsize = 30)
		#m.bluemarble()
		plt.tight_layout()
		plt.savefig("./images/networkx_basemap/map_3.png", format = "png", dpi = 300)
		plt.show()
		print ("successful visualization")
		return 0
示例#49
0
        d = pd.DataFrame({'recipients': recipient,'score': score})
        d = d.sort_values(['score'], ascending=False)
        predictedPerson = d['recipients'].iloc[0]
        PredictedRecipient.append(predictedPerson)
    df['PredictedRecipient'] = PredictedRecipient
    finalDf.append(df)
finalDf = pd.concat(finalDf, axis=0)

############# Compute  Accuracy

dPerformance = compute_perfromance(finalDf)
  
## Networking aspect



G = nx.from_pandas_dataframe(dFeatures, 'sender', 'receiver')

plt.figure(figsize=(20,20))
pos = nx.spring_layout(G, k=.1)
nx.draw_networkx(G, pos, node_size=25, node_color='red', with_labels=True, edge_color='blue')
plt.show()    




    
    


    # pathway_df_n2e = helper.convert_edges_to_node(pathway_df)

    # node_features_df = '../output/features_{}'.format(pathway_name)

    # to get which nodes are sources and targets
    pathway_nodes_df = pd.read_csv('../data/pathways/{}-nodes.txt'.
                                   format(pathway_name),
                                   delimiter='\t')
    pathway_tf_nodes = pathway_nodes_df.ix[
        pathway_nodes_df['node_symbol'] == 'tf']
    pathway_receptor_nodes = pathway_nodes_df.ix[
        pathway_nodes_df['node_symbol'] == 'receptor']

    # create the full interactome as graph
    g = nx.from_pandas_dataframe(interactome_e2n, source='#tail',
                                 target='head',
                                 create_using=nx.DiGraph())

    # add super nodes
    g.add_nodes_from(['super_tf', 'super_receptor'])

    # connect super sources
    receptor_list = list(pathway_receptor_nodes["#node"])
    add_receptor_edges = [('super_receptor', n) for n in receptor_list]
    g.add_edges_from(add_receptor_edges)

    # connect super targets
    tf_list = list(pathway_tf_nodes["#node"])
    add_tf_edges = [(n, 'super_tf') for n in tf_list]
    g.add_edges_from(add_tf_edges)
示例#51
0
# concatenate all these edges from all these events
# and write them to an edgelist.
# append multiple items in one go instead of this
e = e1.append(e2);
e = e.append(e3);
e.to_csv('edges.csv');

# I previously made the perp list by concatenating the perp1 perp2 and perp3 columns
# then taking unique strings
# so supposing we are starting from the point where we had perp and edge lists saved as files already
perp_list = pd.read_csv("perp_list.csv");
edges = pd.read_csv("edges.csv")
# make an empty graph
G=nx.Graph();
# feed it edges
edgelist = nx.from_pandas_dataframe(edges)
# feed it nodes
G.add_nodes_from(perp_list);
# [Commenting this code so late that I forgot what this does. To be filled in later.]
test = e.replace(to_replace=perp_list, )

# quantification of categorical traits 

# recalling what we have...
print("Total rows: {0}".format(len(data)));
print(list(data));
# quantify the traits. 1 is least governmental, 3 is most. 
data = data.replace(to_replace='Private Citizens & Property', value=1);
data = data.replace(to_replace='Journalists & Media', value=1);
data = data.replace(to_replace='Educational Institution', value=1);
data = data.replace(to_replace='Abortion Related', value=1);
示例#52
0
def to_networkx_graph(data,create_using=None,multigraph_input=False):
    """Make a NetworkX graph from a known data structure.

    The preferred way to call this is automatically
    from the class constructor

    >>> d={0: {1: {'weight':1}}} # dict-of-dicts single edge (0,1)
    >>> G=nx.Graph(d)

    instead of the equivalent

    >>> G=nx.from_dict_of_dicts(d)

    Parameters
    ----------
    data : object to be converted

       Current known types are:
         any NetworkX graph
         dict-of-dicts
         dist-of-lists
         list of edges
         numpy matrix
         numpy ndarray
         scipy sparse matrix
         pygraphviz agraph

    create_using : NetworkX graph
       Use specified graph for result.  Otherwise a new graph is created.

    multigraph_input : bool (default False)
      If True and  data is a dict_of_dicts,
      try to create a multigraph assuming dict_of_dict_of_lists.
      If data and create_using are both multigraphs then create
      a multigraph from a multigraph.

    """
    # NX graph
    if hasattr(data,"adj"):
        try:
            result= from_dict_of_dicts(data.adj,\
                    create_using=create_using,\
                    multigraph_input=data.is_multigraph())
            if hasattr(data,'graph'): # data.graph should be dict-like
                result.graph.update(data.graph)
            if hasattr(data,'node'): # data.node should be dict-like
                result.node.update( (n,dd.copy()) for n,dd in data.node.items() )
            return result
        except:
            raise nx.NetworkXError("Input is not a correct NetworkX graph.")

    # pygraphviz  agraph
    if hasattr(data,"is_strict"):
        try:
            return nx.nx_agraph.from_agraph(data,create_using=create_using)
        except:
            raise nx.NetworkXError("Input is not a correct pygraphviz graph.")

    # dict of dicts/lists
    if isinstance(data,dict):
        try:
            return from_dict_of_dicts(data,create_using=create_using,\
                    multigraph_input=multigraph_input)
        except:
            try:
                return from_dict_of_lists(data,create_using=create_using)
            except:
                raise TypeError("Input is not known type.")

    # list or generator of edges
    if (isinstance(data,list)
        or isinstance(data,tuple)
        or hasattr(data,'next')
        or hasattr(data, '__next__')):
        try:
            return from_edgelist(data,create_using=create_using)
        except:
            raise nx.NetworkXError("Input is not a valid edge list")

    # Pandas DataFrame
    try:
        import pandas as pd
        if isinstance(data, pd.DataFrame):
            try:
                return nx.from_pandas_dataframe(data, create_using=create_using)
            except:
                msg = "Input is not a correct Pandas DataFrame."
                raise nx.NetworkXError(msg)
    except ImportError:
        msg = 'pandas not found, skipping conversion test.'
        warnings.warn(msg, ImportWarning)

    # numpy matrix or ndarray
    try:
        import numpy
        if isinstance(data,numpy.matrix) or \
               isinstance(data,numpy.ndarray):
            try:
                return nx.from_numpy_matrix(data,create_using=create_using)
            except:
                raise nx.NetworkXError(\
                  "Input is not a correct numpy matrix or array.")
    except ImportError:
        warnings.warn('numpy not found, skipping conversion test.',
                      ImportWarning)

    # scipy sparse matrix - any format
    try:
        import scipy
        if hasattr(data,"format"):
            try:
                return nx.from_scipy_sparse_matrix(data,create_using=create_using)
            except:
                raise nx.NetworkXError(\
                      "Input is not a correct scipy sparse matrix type.")
    except ImportError:
        warnings.warn('scipy not found, skipping conversion test.',
                      ImportWarning)


    raise nx.NetworkXError(\
          "Input is not a known data type for conversion.")

    return
示例#53
0
import pandas as pd
import networkx as nx
import numpy as np
import sys

if sys.version_info.major < 3: sys.exit("Python 3.x or above required")
if float(nx.__version__) < 1.11: sys.exit("Networkx 1.11 or above required")

# Read the edge list and convert it to a network
edges = pd.read_csv("all_edges.csv")
F = nx.from_pandas_dataframe(edges, 'node_1', 'node_2')

# Read node lists
officers = pd.read_csv("Officers.csv", low_memory=False).set_index('node_id')
intermediaries = pd.read_csv("Intermediaries.csv").set_index('node_id')
addresses = pd.read_csv("Addresses.csv", low_memory=False).set_index('node_id')
entities = pd.read_csv("Entities.csv", low_memory=False).set_index('node_id')

# Combine the node lists into one dataframe
officers["type"] = "officer"
intermediaries["type"] = "intermediary"
addresses["type"] = "address"
entities["type"] = "entity"

all_nodes = pd.concat([officers, intermediaries, addresses, entities])

# Do some cleanup of names
all_nodes['name'] = all_nodes['name'].str.upper()
all_nodes['name'] = all_nodes['name'].str.strip()

# Ensure that all "Bearers" do not become a single node
        # add plot.ly plotting options
        traces.append(make_scatter(lon_cc,lat_cc))
     
    return traces

def get_coastline_traces():
    poly_paths = m.drawcoastlines().get_paths() # coastline polygon paths
    N_poly = 91  # use only the 91st biggest coastlines (i.e. no rivers)
    return polygons_to_traces(poly_paths, N_poly)

traces_cc = get_coastline_traces()


############################## FRAME ABOVE FOR MAP, FRAME BELOW FOR NODES

G=nx.from_pandas_dataframe(latLonPopulated_RT, source='tweetId',target='retweeter')

pos = {}
color = {}
for i,tweet in latLonPopulated_RT.iterrows():
    pos[tweet['tweetId'].upper()] = np.asarray([tweet['x'],tweet['y']])
    pos[tweet['retweeter'].upper()] = np.asarray([tweet['x'],tweet['y']])
    color[tweet['tweetId'].upper()] = tweet['node_color']
    color[tweet['retweeter'].upper()] = tweet['node_color']

for n,p in pos.iteritems():
    G.node[n]['pos'] = p

for n,c in color.iteritems():
    G.node[n]['color'] = c
    
示例#55
0
def MedianDegree(d): 
    G = nx.from_pandas_dataframe(d, 'actor', 'target', ['created_time'])
    G = sorted(list(G.degree().values()))
    return np.median(G)
示例#56
0
import pandas as pd
import MySQLdb as mdb
from a_Model import ModelIt
from events import returnTopEvents
from geocode import latlon
from rankingSim import artistPath
import networkx as nx
import unicodedata
import HTMLParser
from secrets import username, host, dbname, pswd

engine = create_engine('mysql://%s:%s@localhost/%s'%(username,pswd,dbname))
con = None
con = mdb.connect('localhost', username, pswd, dbname)
simArtists = pd.read_sql_table('relArtistFull',con='mysql://%s:%s@localhost/%s'%(username,pswd,dbname))
g = nx.from_pandas_dataframe(simArtists,'Artist','RelArtist')



#@app.route('/')
#@app.route('/index/')
#def index():
#	user = { 'nickname': 'Dan' } # fake user
#	return render_template("index.html",
#		title = 'Home',
#		user = user)

@app.errorhandler(500)
def page_not_found(e):
    return render_template('error.html'), 500
示例#57
0
    
    dataTime = dataTime0.set_index("vID")
    #index_dataTime = dataTime.index.values
    #print dataTime
    
    perm = list(permutations(list_vIDs,2))
    #print perm
    dist = [((((dataTime.loc[p[0],'gX'] - dataTime.loc[p[1],'gX']))**2) + 
            (((dataTime.loc[p[0],'gY'] - dataTime.loc[p[1],'gY']))**2))**0.5 for p in perm]
    dataDist = pd.DataFrame(dist , index=perm, columns = {'dist'}) 
    

    #Create the fields vID and To
    dataDist['FromTo'] = dataDist.index
    dataDist['From'] = dataDist.FromTo.str[0]
    dataDist['To'] = dataDist.FromTo.str[1]
    #I multiply by 100 in order to scale the number
    dataDist['weight'] = (1/dataDist.dist)*100
    
    #Delete the intermediate FromTo field
    dataDist = dataDist.drop('FromTo', 1)
    

    
    graph = nx.from_pandas_dataframe(dataDist, 'From','To',['weight'])
 

    save_graph(graph,'D:\\zzzLola\\PhD\\DataSet\\US101\\coding\\graphs\\000_my_graph+%i.png' %time)

    
示例#58
0
# -*- coding: utf-8 -*-
#%% read in data - use a pandas data frame just for convenience
import pandas as pd
data = pd.read_table("./data/HW1_4.txt",
                     sep = " ",
                     header = None, 
                     names = ['vx', 'vy', 'weight'])

# %% networkx section
import networkx as nx

# use networkx to create graph object
graph = nx.from_pandas_dataframe(data, 
                                 source = "vx", 
                                 target = "vy", 
                                 edge_attr = "weight")

# get the dijkstra shortest path
path_dijkstra = nx.all_pairs_dijkstra_path(graph)
path_1_6_dijkstra = path_dijkstra[1][6]

# get the all-pairs shortest path
path_all_pairs = nx.all_pairs_shortest_path(graph)
path_1_6_all_pairs =  path_all_pairs[1][6]

示例#59
0
def create_graph(dataframe, filename):
    graph = nx.from_pandas_dataframe(dataframe, 'x', 'y', 'weight')
    add_attribute(graph)
    nx.write_graphml(graph, filename+'.graphml')