def to_networkx_map(dataset, interactor_a, interactor_b, selfloop='n'): """ Create a graph from dataset by using NetworkX package Args: dataset (:obj: DataFrame from pandas): the Dataframe with header and at least two columns interactor_a (:obj: str): column for interactor A interactor_b (:obj: str): column for interactor B selfloop (:obj: str, optional): create a graph with or without selfloop, it should be 'y' or 'n' Returns: :obj:'Graph from networkx': the undirected graph from dataset Raises: ValueError if the column(s) is not found in the dataset ValueError if the selfloop is not 'y' or 'n' ValueError if the graph is not successfully created """ if (interactor_a not in dataset) or (interactor_b not in dataset): raise ValueError('the column(s) is not found in the dataset') elif selfloop not in ['y', 'n']: raise ValueError("the selfloop should be 'y' or 'n'") else: if selfloop == 'y': nx_map = nx.from_pandas_dataframe(dataset, interactor_a, interactor_b) elif selfloop == 'n': nx_map = nx.from_pandas_dataframe(dataset, interactor_a, interactor_b) nx_map.remove_edges_from(nx_map.selfloop_edges()) print("Retrieve the protein-protein interaction network with {} nodes".format(Parameter.num_of_node(nx_map)), "and {} edges.".format(Parameter.num_of_edge(nx_map))) return nx_map
def readDGFrameFile(filename, interRRI_norRNA=1, support_read=3): fn_stat_dict = nested_dict() inter, intra = 0, 0 with open(filename, 'r') as TXT: for line in TXT: line = line.strip() if not line or line.startswith('#'): continue arr = line.split('\t') if arr[1] == arr[5]: intra += 1 else: inter += 1 fn_stat_dict['inter'] = inter fn_stat_dict['intra'] = intra fn_stat_dict['all'] = intra + inter df = pd.read_csv(filename, header=0, sep='\t') df['type'] = ['intra' if i == j else 'inter' for i,j in zip(df['lchr'], df['rchr'])] df_inter_RRI = df[df['type']=='inter'] nx_inter_RRI = nx.from_pandas_dataframe(df_inter_RRI, 'lchr', 'rchr') fn_stat_dict['uniq RRI'] = len(nx_inter_RRI.edges()) if interRRI_norRNA: df_inter_RRI = df_inter_RRI[(df_inter_RRI['ltype'].isin(['mRNA', 'lncRNA'])) & (df_inter_RRI['rtype'].isin(['mRNA', 'lncRNA']))] df_inter_RRI = df_inter_RRI[df_inter_RRI['support']>=support_read] nx_inter_RRI = nx.from_pandas_dataframe(df_inter_RRI, 'lchr', 'rchr') nx_inter_RRI_info_dict, G_largest = RRI_network_property2(nx_inter_RRI) for i,j in nx_inter_RRI_info_dict.items(): fn_stat_dict[i] = j # fn_stat_df['sampling'] = '' fn_stat_df = pd.DataFrame(fn_stat_dict, index=[0]) return fn_stat_df
def compute_net_stats_on_read_hrg_pickle(orig_df, gn, metricx): with open(r"Results/{}_hstars.pickle".format(gn), "rb") as in_file: c = cPickle.load(in_file) print " ==> pickle file loaded" if isinstance(c, dict): if len(c.keys()) == 1: c = c.values()[0] # we have k nx gobjects else: print c.keys() if len(orig_df.columns) >= 3: orig = nx.from_pandas_dataframe(orig_df, 'src', 'trg', edge_attr=['ts']) else: orig = nx.from_pandas_dataframe(orig_df, 'src', 'trg') # metrics.network_properties([orig], metricx, c, name=gn, out_tsv=False) ## -- p = mp.Pool(processes=10) for j, gnx in enumerate(c): if isinstance(gnx, list): gnx = gnx[0] p.apply_async(metrics.network_properties, args=( [orig], ['clust'], gnx, gn, True, ), callback=collect_results) p.close() p.join() print(results)
def graph_from_edgedata(edgedata, attr='Weight', directed=True,connected_component=False): ''' :param edgedata: 边的数据 :param attr: string 或 list; 边的属性数据,如果没有权重,设置attr=None, :param directed: 有向图还是无向图 :param connected_component: 返回最大联通子图,默认为True,对于有向图为weakly_connected 未开发 :return: networkx.Graph 或 DiGraph ''' if len(edgedata) < 1: if directed: return nx.DiGraph() else: return nx.Graph() if directed: graph = nx.from_pandas_dataframe(edgedata, 'Source', 'Target', edge_attr=attr, create_using=nx.DiGraph()) if connected_component: #返回最大联通子图 graph = max(nx.weakly_connected_component_subgraphs(graph), key=len) else: graph = nx.from_pandas_dataframe(edgedata, 'Source', 'Target', edge_attr=attr, create_using=nx.Graph()) if connected_component: graph = max(nx.connected_component_subgraphs(graph), key=len) print('Directed Graph :', graph.is_directed()) return graph
def get_prod_rules(data_frame, nbr_blocks): df = data_frame nb = int(nbr_blocks) chunked_graphs_lst = [] if nb: slice = int((df.ts.max() - df.ts.min()) / nb) WG = nx.from_pandas_dataframe(df, 'src', 'trg', ['ts']) # whole graph pos = nx.spring_layout(WG) for blk in range(df.ts.min(), df.ts.max(), slice): mask = (df['ts'] >= blk) & (df['ts'] <= blk + slice) ldf = df.loc[mask] G = nx.from_pandas_dataframe(ldf, 'src', 'trg', ['ts']) chunked_graphs_lst.append(G) prules = derive_prules_from(chunked_graphs_lst) df = pd.DataFrame(columns=['rid', 'lhs', 'rhs', 'p']) for k, r in enumerate(prules): #print "{}: {}".format(k, [x for x in r if 'S' in x])# [len(x) for x in lhs if 'S' in x]) # df = pd.concat ([df, pd.DataFrame([x for x in r], columns=['rid','lhs', 'rhs','p'])]) bdf = pd.DataFrame([x for x in r], columns=['rid', 'lhs', 'rhs', 'p']) bdf['lcnt'] = bdf['lhs'].apply(lambda x: len(x)) bdf['rcnt'] = bdf['rhs'].apply(lambda x: len(x)) df = pd.concat([df, bdf]) break print df.head() # print 'size of the rhs'[len(x) for x in df[df['lhs']=='S']['rhs']] # tdf = df[['lhs','rhs']].apply(lambda x: [len(r) for r in x]) # tdf.columns=['lcnt','rcnt'] # df =pd.concat([df,tdf],axis=1) # print df[['lcnt','rcnt']].describe() # # df.boxplot(['lcnt','rcnt']) # df.boxplot(by=['lhs','rhs'], notch=True) # # ax.set_xticks(range(10)) df.plot.hist() plt.savefig('/tmp/outfig', bbox_inches='tight') exit() ptsg.plot_timestamped_graphs(chunked_graphs_lst, pos=pos, outfigname="tmp1") chunked_graphs_lst = [] for blk in range(df.ts.min(), df.ts.max(), slice): mask = (df['ts'] <= blk + slice) ldf = df.loc[mask] G = nx.from_pandas_dataframe(ldf, 'src', 'trg', ['ts']) chunked_graphs_lst.append(G) # plot ptsg.plot_timestamped_graphs(chunked_graphs_lst, pos=pos, outfigname="tmp2") if 0: print for k, pr in enumerate(prules): ## print enum rules print "{}\t{}".format(k, pr)
def get_hrg_production_rules(edgelist_data_frame, graph_name): from growing import derive_prules_from df = edgelist_data_frame try: G = nx.from_pandas_dataframe(df, 'src', 'trg', ['ts']) # whole graph except Exception, e: print '==========================\n\t', print str(e) traceback.print_exc() G = nx.from_pandas_dataframe(df, 'src', 'trg')
def __init__(self, check_ins, friends, self_check=True): self.uid = check_ins['uid'].unique() self.locid = check_ins['locid'].unique() self.ul_graph = nx.from_pandas_dataframe(check_ins, 'uid', 'locid', ['weight']) self.uu_graph = nx.from_pandas_dataframe(friends, 'u1', 'u2') if self_check: uid_cmp = set(friends['u1'].unique()) | set(friends['u2'].unique()) assert uid_cmp.issubset(set(self.uid)) logging.debug('self check pass %d/%d' % (len(uid_cmp), len(set(self.uid))))
def read_dir(dir='/Share/home/zhangqf7/gongjing/zebrafish/data/paris/shi-zp-5-rep-combine/downsampling_N', to_dgframe=0, get_inter_intra=1, read_nx=1, interRRI_norRNA=1, support_read=3): fn_ls = os.listdir(dir) # print fn_ls fn_stat_dict = nested_dict() downsampling_N_draw = dir + '.subnetwork.draw.pdf' fig,ax=plt.subplots(10,1) for n,fn in enumerate(fn_ls): print "process: %s"%(fn) dfFile = dir + '/' + fn + '/' + '27-DG' frameFile = dfFile + '.txt' if to_dgframe: paris_dg2frame.DG2Frame(dfFile=dfFile, frameFile=frameFile) if get_inter_intra: inter, intra = 0, 0 with open(frameFile, 'r') as TXT: for line in TXT: line = line.strip() if not line or line.startswith('#'): continue arr = line.split('\t') if arr[1] == arr[5]: intra += 1 else: inter += 1 fn_stat_dict[fn]['inter'] = inter fn_stat_dict[fn]['intra'] = intra fn_stat_dict[fn]['all'] = intra + inter if read_nx: df = pd.read_csv(frameFile, header=0, sep='\t') df['type'] = ['intra' if i == j else 'inter' for i,j in zip(df['lchr'], df['rchr'])] df_inter_RRI = df[df['type']=='inter'] nx_inter_RRI = nx.from_pandas_dataframe(df_inter_RRI, 'lchr', 'rchr') fn_stat_dict[fn]['uniq RRI'] = len(nx_inter_RRI.edges()) if interRRI_norRNA: df_inter_RRI = df_inter_RRI[(df_inter_RRI['ltype'].isin(['mRNA', 'lncRNA'])) & (df_inter_RRI['rtype'].isin(['mRNA', 'lncRNA']))] df_inter_RRI = df_inter_RRI[df_inter_RRI['support']>=support_read] nx_inter_RRI = nx.from_pandas_dataframe(df_inter_RRI, 'lchr', 'rchr') nx_inter_RRI_info_dict, G_largest = RRI_network_property2(nx_inter_RRI) for i,j in nx_inter_RRI_info_dict.items(): fn_stat_dict[fn][i] = j # fn_stat_dict[fn]['uniq RRI'] = len(nx_inter_RRI.edges()) if n < 10: draw_graph(G_largest, ax=ax[n]) plt.savefig(downsampling_N_draw) savefn = dir + '.stat.txt' fn_stat_df = pd.DataFrame.from_dict(fn_stat_dict) fn_stat_df = fn_stat_df.T fn_stat_df['sampling'] = fn_stat_df.index print fn_stat_df.head() fn_stat_df.to_csv(savefn, header=True, index=False, sep='\t') return fn_stat_df
def set_attributes(dataframe, attribute_dataframe, graph_type='dir'): """ Returns a network with attributes assigned to each node Input parameters: 1. dataframe - edge list 2. attribute_dataframe - contains node id, and attributes (name, parrty, nationality, occupation, gender) 3. graph_type - 'dir' or 'undir' """ # load dataframe as graph if graph_type == 'dir': G = nx.from_pandas_dataframe(dataframe, 'from', 'to', edge_attr=False, create_using=nx.DiGraph()) elif graph_type == 'undir': G = nx.from_pandas_dataframe(dataframe, 'from', 'to', edge_attr=False, create_using=nx.Graph()) # G = nx.from_pandas_dataframe(dataframe,'from','to') # get list of nodes node_list = G.nodes() # create dictionaries data = attribute_dataframe[attribute_dataframe["ID"].isin(node_list)] name_data = data[["ID", "name"]].set_index('ID')['name'].to_dict() gender_data = data[["ID", "gender"]].set_index('ID')['gender'].to_dict() occupation_data = data[["ID", "occupation" ]].set_index('ID')['occupation'].to_dict() nationality_data = data[["ID", "nationality" ]].set_index('ID')['nationality'].to_dict() party_data = data[["ID", "party"]].set_index('ID')['party'].to_dict() birth = data[["ID", "birthDate"]].set_index('ID')['birthDate'].to_dict() death = data[["ID", "deathDate"]].set_index('ID')['deathDate'].to_dict() # set attributes nx.set_node_attributes(G, 'gender', gender_data) nx.set_node_attributes(G, 'name', name_data) nx.set_node_attributes(G, 'occupation', occupation_data) nx.set_node_attributes(G, 'nationality', nationality_data) nx.set_node_attributes(G, 'party', party_data) nx.set_node_attributes(G, 'birthDate', birth) nx.set_node_attributes(G, 'deathDate', death) #print stuff num_n = len(G.nodes()) num_e = len(G.edges()) print("Number of nodes: ", num_n) print("Number of edges: ", num_e) return G, num_n, num_e
def test_from_dataframe_all_attr(self, ): Gtrue = nx.Graph([('E', 'C', {'cost': 9, 'weight': 10}), ('B', 'A', {'cost': 1, 'weight': 7}), ('A', 'D', {'cost': 7, 'weight': 4})]) G = nx.from_pandas_dataframe(self.df, 0, 'b', True) self.assert_equal(G, Gtrue) # MultiGraph MGtrue = nx.MultiGraph(Gtrue) MGtrue.add_edge('A', 'D', cost=16, weight=4) MG = nx.from_pandas_dataframe(self.mdf, 0, 'b', True, nx.MultiGraph()) self.assert_equal(MG, MGtrue)
def Hstar_Graphs_Ignore_Time(df, graph_name, tslices, axs): if len(df.columns) == 3: G = nx.from_pandas_dataframe(df, 'src', 'trg', edge_attr='ts') else: G = nx.from_pandas_dataframe(df, 'src', 'trg') # force to unrepeated edgesA if 0: print nx.info(G) G = G.to_undirected() if 0: print nx.info(G) exit() # Derive the prod rules in a naive way, where prod_rules = PHRG.probabilistic_hrg_learning(G) g = pcfg.Grammar('S') for (id, lhs, rhs, prob) in prod_rules: g.add_rule(pcfg.Rule(id, lhs, rhs, prob)) num_nodes = G.number_of_nodes() print "Starting max size" g.set_max_size(num_nodes) print "Done with max size" Hstars = [] num_samples = 20 print '*' * 40 for i in range(0, num_samples): rule_list = g.sample(num_nodes) hstar = PHRG.grow(rule_list, g)[0] Hstars.append(hstar) # if 0: # g = nx.from_pandas_dataframe(df, 'src', 'trg', edge_attr=['ts']) # draw_degree_whole_graph(g,axs) # draw_degree(Hstars, axs=axs, col='r') # #axs.set_title('Rules derived by ignoring time') # axs.set_ylabel('Frequency') # axs.set_xlabel('degree') if 1: # metricx = [ 'degree','hops', 'clust', 'assort', 'kcore','eigen','gcd'] metricx = ['eigen'] g = nx.from_pandas_dataframe(df, 'src', 'trg', edge_attr=['ts']) # graph_name = os.path.basename(f_path).rstrip('.tel') print ">", graph_name metrics.network_properties([g], metricx, Hstars, name=graph_name, out_tsv=True)
def read_graph(filename, directed=True, sep=' ', header = None): """ Create networkx graph using pandas. :param filename: every line (u, v) :param directed: boolean :param sep: separator in file :return """ df = pd.read_csv(filename, sep=sep, header = header) if directed: G = nx.from_pandas_dataframe(df, 0, 1, create_using=nx.DiGraph()) else: G = nx.from_pandas_dataframe(df, 0, 1) return G
def net_info(edgelist_fname): dfs = Pandas_DataFrame_From_Edgelist([edgelist_fname]) df = dfs[0] try: g = nx.from_pandas_dataframe(df, 'src', 'trg', edge_attr=['ts']) except Exception: g = nx.from_pandas_dataframe(df, 'src', 'trg') if df.empty: g = nx.read_edgelist(edgelist_fname, comments="%") gn = graph_name(edgelist_fname) return (gn, g.number_of_nodes(), g.number_of_edges())
def test_from_dataframe_all_attr(self,): Gtrue = nx.Graph( [ ("E", "C", {"cost": 9, "weight": 10}), ("B", "A", {"cost": 1, "weight": 7}), ("A", "D", {"cost": 7, "weight": 4}), ] ) G = nx.from_pandas_dataframe(self.df, 0, "b", True) self.assert_equal(G, Gtrue) # MultiGraph MGtrue = nx.MultiGraph(Gtrue) MGtrue.add_edge("A", "D", cost=16, weight=4) MG = nx.from_pandas_dataframe(self.mdf, 0, "b", True, nx.MultiGraph()) self.assert_equal(MG, MGtrue)
def read_graph_OLD(filename, directed=True, sep=' ', header=None): """ Create networkx graph using pandas. :param filename: every line (u, v) :param directed: boolean :param sep: separator in file :return """ df = pd.read_csv(filename, sep=sep, header=header) if directed: G = nx.from_pandas_dataframe(df, 0, 1, create_using=nx.DiGraph()) else: G = nx.from_pandas_dataframe(df, 0, 1) print('Read graph') return G
def graph_from_pathway(pathway): edge_list = pathway[['#tail', 'head']] g = nx.from_pandas_dataframe(edge_list, '#tail', 'head', create_using=nx.DiGraph()) return (g)
def make_graph(edgesdf, edge_attr='fdist', name=None): g = nx.from_pandas_dataframe(edgesdf, source='node1', target='node2', edge_attr=edge_attr) if name: g.add_node('namenode', name=name) return g
def Structure_Varying_Overtime(df, hrBlck, axs): # import datetime # red_patch = mpatches.Patch(color='red', label='uniq nodes') # blu_patch = mpatches.Patch(color='blue', label='edges') print '{} hr'.format(hrBlck) dat = {} clqs = {} agg_hrs = 0 for s in range(df['ts'].min(), df['ts'].max(), int(3600 * hrBlck)): mask = (df['ts'] >= s) & (df['ts'] < s + 3600 * hrBlck) tdf = df.loc[mask] agg_hrs += hrBlck SG = nx.from_pandas_dataframe(tdf, 'src', 'trg', ['ts']) dat[agg_hrs] = np.mean(SG.degree().values()) cliq = nx.find_cliques(SG) clqs[agg_hrs] = np.mean([len(c) for c in cliq]) xvals = sorted(dat.keys()) #print [datetime.datetime.fromtimestamp(d).strftime("%d/%m") for d in xvals] yvals = [dat[x] for x in xvals] axs.plot(xvals, yvals, '.', linestyle="-", label='Avg degree') # Save to disk the need files with open("Results/avg_degree_structure_in_{}hrs.tsv".format(hrBlck), 'w') as f: for k in range(0, len(yvals)): f.write("({},{})\n".format(xvals[k], yvals[k])) yvals = [clqs[x] for x in xvals] axs.plot(xvals, yvals, '.', linestyle="-", label="Avg clique size") axs.set_xlabel('hours') # Save to disk the need files with open("Results/avg_cliq_size_structure_in_{}hrs.tsv".format(hrBlck), 'w') as f: for k in range(0, len(yvals)): f.write("({},{})\n".format(xvals[k], yvals[k])) return
def simm2net(simm): stacked = simm.stack() sliced = stacked[stacked >= 0.7] net = nx.from_pandas_dataframe(sliced.reset_index(), "level_0", "level_1") net.add_nodes_from(att_mtx.columns) return net
def generate_network_plot_univ(network_data, label): """ :param df_journal: :return: """ # Create new column with number of publications by authors network_data['count'] = '' n_publications = network_data['from'].value_counts() # Set the number of publications to each author for name in n_publications.index: network_data.loc[(network_data['from'] == name), 'count'] = int( n_publications[n_publications.index == name].values) # Build your graph G = nx.from_pandas_dataframe(network_data, 'from', 'to') # labels = {} # # for node in G.nodes(): # if node in list(network_data['label'].dropna().values): # labels[node] = node # Plot it nx.draw(G, with_labels=label, node_size=list(network_data['count'].values * 10)) plt.show()
def _individual_measures(self): print("Running NX analysis for {0} and timeframe length {1}".format(self._owner, conf.a_length_timeframe)) res_louvain = {} res_degree_centrality = {} res_betweenness_centrality = {} res_eigenvector_centrality = {} res_modularity = {} time_start = time.time() for dt in rrule.rrule(rrule.WEEKLY, dtstart=self._startdt, until=self._enddt): lap_time = time.time() links = self._controller.get_communication_subgraph(self._owner, dt) if not links.empty: multi_graph = nx.from_pandas_dataframe(links, source="source", target="target", create_using=nx.MultiGraph()) if conf.a_louvain: partition = nxlouvain.best_partition(multi_graph) res_louvain[dt.strftime("%Y-%m-%d")] = partition if conf.a_betweenness_centrality: bc = nx.betweenness_centrality(multi_graph, normalized=True) res_betweenness_centrality[dt.strftime("%Y-%m-%d")] = bc if conf.a_degree_centrality: dc = nx.degree_centrality(multi_graph) res_degree_centrality[dt.strftime("%Y-%m-%d")] = dc if conf.a_modularity: mod = nxlouvain.modularity(partition, multi_graph) res_modularity[dt.strftime("%Y-%m-%d")] = mod if conf.a_eigenvector_centrality: pass # simple_graph = self.convert_to_simple(multi_graph) # TODO: eigenvector centrality calculation fails occasionally # reason may be that nx.eigenvector_centrality() can't handle star graphs # https://stackoverflow.com/questions/43208737/using-networkx-to-calculate-eigenvector-centrality # ec = nx.eigenvector_centrality(simple_graph) # res_eigenvector_centrality[dt.strftime("%Y-%m-%d")] = ec if conf.output_verbose: print("current: {0} - time: {0:.2f}s".format(dt.date(), time.time() - lap_time)) print("{0:.2f}s".format(time.time()-time_start)) print() self._modularity = res_modularity self._degree_centrality = res_degree_centrality self._betweenness_centrality = res_betweenness_centrality self._degree_centrality = res_degree_centrality self._eigenvector_centrality = res_eigenvector_centrality self._partition = res_louvain
def createGraph(fileUrl): df = pd.read_csv(fileUrl, sep=',') #,error_bad_lines=False G = nx.from_pandas_dataframe(df, source='Source', target='Target', edge_attr='weight') return G
def build_org_networks(self): """ Build graphs for connected users where applicable Note degree and children will have high correlation """ self.graphs = {} for group, df in self.groups['org_id']: graph_df = df[['invited_by_user_id', 'object_id' ]][df.invited_by_user_id.notnull()].astype(int) if graph_df.shape[0] > 1: graph = nx.from_pandas_dataframe(graph_df.astype(int), 'invited_by_user_id', 'object_id') self.graphs[group] = graph degrees = np.array(graph.degree().items()).astype(int) self.dfs['users']['degree'].loc[degrees[:, 0]] = degrees[:, 1] for g in nx.connected_component_subgraphs(graph): nodes = g.nodes() lenNodes = len(nodes) if lenNodes > 3: self.dfs['users']['local_rank'].loc[nodes] = len(nodes) self.dfs['users']['children'] = self.dfs['users']['degree'] -\ self.dfs['users']['invited_by_user_id'].notnull()
def read_load_graph(fname): df = Pandas_DataFrame_From_Edgelist([fname])[0] G = nx.from_pandas_dataframe(df, source='src', target='trg') Gc = max(nx.connected_component_subgraphs(G), key=len) gname = graph_name(fname) Gc.name = gname return Gc
def MakeLFN(flow_years, nrows=None, manual_filepath=None, print_info=True): ''' Make Labour Flow Network where one job change is sufficient for a link. Args: - input_filepath: text file with col format: firm1_ID, firm2_ID, number - nrows: option to restrict the number of rows of flows read in. this allows for the creation of a smaller graph for testing. Returns: - networkx graph ''' #note that pandas deals with the header automatically so there # is no need to do skiprows=1 df = MakeFlowsDF(flow_years, manual_filepath=manual_filepath, nrows=nrows) g = nx.from_pandas_dataframe( df, source='from_firm', target='to_firm', edge_attr=None #take care with edge_attr='Number' # it will record only the second instance of the # edge (ie it will only see one flow direction) ) #set graph name to be of the form 'Flows: year1-year2' graph_name = 'LFN with flows: ' + flow_years + '.' if nrows != None: ' '.join([graph_name, 'Warning: Only used first', str(nrows), 'of flows from each year\'s csv file (eg, if 1996-1997 then nrows will', 'have been used, but if 1996-1998 then 3*nrows will have been used).']) g.name = graph_name if print_info==True: print(nx.info(g)) return g
def load_node_data(df_adj_list, df_annotations, df_unlabelled): network = from_pandas_dataframe(df_adj_list, source="User_ID_x", target="User_ID_y", edge_attr=["Weights"]) for label in df_annotations.columns.values: set_node_attributes(network, values=pd.Series( df_annotations[label], index=df_annotations.index).to_dict(), name=label) labelled_node_train, stance_train = map( np.array, zip(*[([node], value["Stance"] == "Favour") for node, value in network.nodes(data=True) if value["Stance"] in ["Favour", "Against"]])) unlabelled_node = map( np.array, zip(*[([node]) for node, value in network.nodes(data=True) if value["Stance"] == "NONE"])) return dataset(labelled_node_train, stance_train, unlabelled_node, network)
def plot_nbr_prod_rules_per_ts(pddf, axs, kthSlice, nSlices): span = (pddf['ts'].max() - pddf['ts'].min()) / nSlices mask = (pddf['ts'] >= pddf['ts'].min() + span * kthSlice) & ( pddf['ts'] < pddf['ts'].min() + span * (kthSlice + 1)) print pddf.shape pddf = pddf.loc[mask] print pddf.shape # sg = nx.from_pandas_dataframe(pddf, 'src', 'trg', ['ts']) # if 0: print nx.info(sg) # cliq=nx.find_cliques(sg) # print sorted((len(c) for c in cliq)) gb = pddf.groupby(['ts']).groups ts_cliq_cnt = {} for k in gb.keys(): df = pddf.loc[gb[k]] sg = nx.from_pandas_dataframe(df, 'src', 'trg', ['ts']) cliq = nx.find_cliques(sg) ts_cliq_cnt[k] = [len(c) for c in cliq] df = pd.DataFrame.from_dict(ts_cliq_cnt.items(), dtype=np.int64) df['av'] = df[1].apply(lambda x: np.mean(x)) df.sort_values(by=[0], inplace=True) # print df.head() # df['av'].plot(x=[0],ax=axs,color='b',alpha=0.75) axs.plot(df[0].values, df['av'].values, 'b', alpha=0.6) axs.set_xlabel('epochs') axs.set_ylabel('Avg Clique Length') return # [_]
def draw_weighted_graph(identifier, species=None, limit=100): result = creat_model_input(identifier, species, limit) G = nx.from_pandas_dataframe(result, 'interactor_A', 'interactor_B', [ 'score', 'nscore', 'fscore', 'pscore', 'hscore', 'ascore', 'escore', 'dscore', 'tscore' ]) elarge = [(u, v) for (u, v, d) in G.edges(data=True) if float(d['score']) > 0.7] esmall = [(u, v) for (u, v, d) in G.edges(data=True) if float(d['score']) <= 0.7] pos = nx.random_layout(G) # positions for all nodes nx.draw_networkx_nodes(G, pos, node_size=80) # nodes nx.draw_networkx_edges(G, pos, edgelist=elarge, width=0.5) nx.draw_networkx_edges(G, pos, edgelist=esmall, width=0.5, alpha=0.5, edge_color='b', style='dashed') nx.draw_networkx_labels(G, pos, font_size=3, font_family='sans-serif') # labels plt.axis('off') plt.savefig(identifier + "_weighted_graph.png", dpi=500) # save as png plt.show() # display
def moran(data, *args): dist = pd.DataFrame( pd.read_csv(PATH + '/processed/%s/distance_area/dist_matrix_adm%s.csv' % (country, list(args[0])[-1]))) response = data.groupby(args[0])[args[1]].mean().reset_index().dropna() non_missing = np.array(response[args[0]]) dist = dist[dist['Source'].isin(non_missing)] dist = dist[dist['Target'].isin(non_missing)] G = nx.from_pandas_dataframe(dist, 'Source', 'Target', 'Distance_km') H = nx.adjacency_matrix(G, weight='Distance_km').todense() H[H > 500] = 0 w = np.reciprocal(H + 1) response = np.array(response[args[1]]) x_bar = np.mean(response) adms = len(w) c = np.zeros((adms, adms)) for i in range(adms): for j in range(adms): c[i, j] = (response[i] - x_bar) * (response[j] - x_bar) mysum = np.sum(np.multiply(c, w)) s2 = np.sum(np.power(response - x_bar, 2)) / adms sumw = np.sum(w) I = mysum / (s2 * sumw) print I
def build_file_move_graph(file_frame): move_frame = file_frame[file_frame.move] move_graph = nx.from_pandas_dataframe( move_frame, source='file_path1', target='file_path2', edge_attr='hexsha', create_using=nx.DiGraph()) return move_graph
def build_graph(data): ### Construct graph G from df # Adding the weight to prepare for nx df = data.groupby(['A_NUMBER', 'B_NUMBER'])['DURATION'].sum().reset_index() df['DURATION_SEC'] = df['DURATION'] / np.timedelta64(1, 's') G = nx.from_pandas_dataframe(df, 'A_NUMBER', 'B_NUMBER', ['DURATION_SEC']) return G
def __init__(self, path): '''Constructor''' edges = pd.read_csv(path, header=0) self.l = edges.columns.values.tolist() self.l.remove('source') self.l.remove('target') self.G = nx.from_pandas_dataframe(edges, 'source', 'target', self.l, create_using=nx.DiGraph())
def find_centrality(df, cent_type='betweenness', keep_thresh=0.5): df_b = df.copy() df_b[(df.abs() < keep_thresh)] = 0 #eliminate edges that are too weak labels = list(df_b.index) temp = abs(df_b.copy()) temp.insert(0, 'var1', labels) df_b = pandas.melt(temp, 'var1', var_name='var2', value_name='edge') df_b = df_b.loc[(df_b['edge'] > 0), :] # take only those edge pairs that made the cut df_g = networkx.from_pandas_dataframe( df_b, 'var1', 'var2', 'edge') # takes a list of valid edges if cent_type == 'betweenness': centrality = networkx.betweenness_centrality(df_g) elif cent_type == 'degree': centrality = networkx.degree_centrality(df_g) elif cent_type == 'closeness': centrality = networkx.closeness_centrality(df_g) elif cent_type == 'eigenvector': centrality = networkx.eigenvector_centrality(df_g) else: print('error, unknown centrality') return -1 centrality_df = pandas.DataFrame.from_dict(centrality, orient='index') centrality_df.sort_values(0, axis=0, ascending=False, inplace=True) centrality_df = centrality_df.transpose() return centrality_df, df_g
def edgelist_dimacs_graph(orig_graph, peo_h, prn_tw=False): fname = orig_graph gname = os.path.basename(fname).split(".") gname = sorted(gname, reverse=True, key=len)[0] if ".tar.bz2" in fname: from tdec.read_tarbz2 import read_tarbz2_file edglst = read_tarbz2_file(fname) df = pd.DataFrame(edglst, dtype=int) G = nx.from_pandas_dataframe(df, source=0, target=1) else: G = nx.read_edgelist(fname, comments="%", data=False, nodetype=int) # print "...", G.number_of_nodes(), G.number_of_edges() # from numpy import max # print "...", max(G.nodes()) ## to handle larger 300K+ nodes with much larger labels N = max(G.nodes()) M = G.number_of_edges() # +++ Graph Checks if G is None: sys.exit(1) G.remove_edges_from(G.selfloop_edges()) giant_nodes = max(nx.connected_component_subgraphs(G), key=len) G = nx.subgraph(G, giant_nodes) graph_checks(G) # --- graph checks G.name = gname # print "...", G.number_of_nodes(), G.number_of_edges() if G.number_of_nodes() > 500 and not prn_tw: return (nx_edges_to_nddgo_graph_sampling(G, n=N, m=M, peo_h=peo_h), gname) else: return (nx_edges_to_nddgo_graph(G, n=N, m=M, varel=peo_h), gname)
def DataFrame_to_lG(df, directed=False, source='source', target='target', time='time', weight=None): ti, tf = df[time].min(), df[time].max() + 1 if directed: graph = nx.DiGraph else: graph = nx.Graph lG = [] for t in range(ti, tf): cut = df[df[time] == t] if cut.shape[0] == 0: G = graph() else: G = nx.from_pandas_dataframe(cut, source=source, target=target, edge_attr=weight, create_using=graph()) lG.append(G) return lG
def extract_groups(m2m): """Extracts a list of groups from a social network varying through time. Groups are defined as connected components of the social graph at a given time bin. Parameters ---------- m2m : pd.DataFrame The social network, for instance, member-to-member bluetooth proximity data. It must have the following columns: 'datetime', 'member1', and 'member2'. Returns ------- pd.DataFrame : The groups, as a sets of members with datetime. """ groups = m2m.groupby('datetime').apply(lambda df: pd.Series([ frozenset(c) for c in nx.connected_components( nx.from_pandas_dataframe(df.reset_index(), 'member1', 'member2')) ])) groups.name = 'members' return groups.reset_index()[['datetime', 'members']]
def init_simple(): global max_depth global T global grid_dim_x global grid_dim_y global base global route_length global total_distance global total_reward total_distance = 0 total_reward = 0 T = 4 # Num. of iterations grid_dim_x = 1 grid_dim_y = 2 base = 0.0 # Starting node number route_length = 10 # Distance limit global graph global dist global sigma1 global avg_strat1 global regret1 global route df = pd.read_csv("simple/nodes_list.txt", sep=" ") dist = pd.read_csv("simple/dist_simple.gop", sep=" ", header=None) graph = \ nx.from_pandas_dataframe(df, source='node_from', target='node_to', edge_attr=['distance', 'animal_density', 'grid_cell_x', 'grid_cell_y']) sigma1 = [[1 / (grid_dim_x * grid_dim_y)] * grid_dim_y] * grid_dim_x avg_strat1 = [[0] * grid_dim_y] * grid_dim_x regret1 = [[0] * grid_dim_y] * grid_dim_x route = []
def simple_visualization (airport_df, routes_df): if (airport_df is None) or (routes_df is None): print "Data cannot be retrieved and read" else: airport_us = airport_df[(airport_df.Country == "United States")][['Name','Lat', 'Long', 'IATA', 'ICAO']] us_airport_ix = airport_us.index.values routes_us = routes_df[(routes_df['Source Airport ID'].isin(us_airport_ix)) & (routes_df['Dest Airport ID'].isin(us_airport_ix))] #extract routes that flyies from AND to USA routes_us = pd.DataFrame(routes_us.groupby(['Source Airport', 'Dest Airport']).size().reset_index(name='counts')) # to find number of flights in and out of an airport # it is similar to find number of rows in which each airport occur in either one of the 2 columns counts = routes_us['Source Airport'].append(routes_us.loc[routes_us['Source Airport'] != routes_us['Dest Airport'], 'Dest Airport']).value_counts() # create a data frame of position based on names in count counts = pd.DataFrame({'IATA': counts.index, 'total_flight': counts}) pos_data = counts.merge(airport_us, on = 'IATA') # Create graph graph = nx.from_pandas_dataframe(routes_us, source = 'Source Airport', target = 'Dest Airport', edge_attr = 'counts',create_using = nx.DiGraph()) # default graph using Networkx inbuilt graph tools plt.figure(figsize = (10,9)) nx.draw_networkx(graph) plt.savefig("./images/networkx_basemap/map_0.png", format = "png", dpi = 300) plt.show() # Set up base map plt.figure(figsize=(15,20)) m = Basemap( projection='merc', llcrnrlon=-180, llcrnrlat=10, urcrnrlon=-50, urcrnrlat=70, lat_ts=0, resolution='l', suppress_ticks=True) # import long lat as m attribute mx, my = m(pos_data['Long'].values, pos_data['Lat'].values) pos = {} for count, elem in enumerate (pos_data['IATA']): pos[elem] = (mx[count], my[count]) # draw nodes and edges and over aly on basemap nx.draw_networkx_nodes(G = graph, pos = pos, node_list = graph.nodes(), node_color = 'r', alpha = 0.8, node_size = [counts['total_flight'][s]*3 for s in graph.nodes()]) nx.draw_networkx_edges(G = graph, pos = pos, edge_color='g', width = routes_us['counts']*0.75, alpha=0.2, arrows = False) m.drawcountries(linewidth = 3) m.drawstates(linewidth = 0.2) m.drawcoastlines(linewidth=3) plt.tight_layout() plt.savefig("./images/networkx_basemap/map_2.png", format = "png", dpi = 300) plt.show() print ("successful visualization") return 0
def test_from_dataframe_multi_attr(self,): Gtrue = nx.Graph( [ ("E", "C", {"cost": 9, "weight": 10}), ("B", "A", {"cost": 1, "weight": 7}), ("A", "D", {"cost": 7, "weight": 4}), ] ) G = nx.from_pandas_dataframe(self.df, 0, "b", ["weight", "cost"]) self.assert_equal(G, Gtrue)
def calculate(words): # instantiate a dictionary to later be filled with word:miscores wc = defaultdict(float) frames = [] print("...it will take a while. Wait a sec...") for word in words: payload = {'searchstring': word.encode('ascii'), 'searchpositional':'word', 'searchpostag':'all', 'contextsize':'60c', 'sort2':'right', 'terminate':'100', 'searchtype':'coll', 'mistat':'on', 'collocspanleft':'2', 'collocspanright':'2', 'collocfilter':'noun'} r = requests.get("http://clic.cimec.unitn.it/cgi-bin/cqp/cqp.pl?corpuslist=WEBBIT", params=payload) soup = BeautifulSoup(r.content, 'lxml') # parse the html table and extract words and miscores. Add scores temp = [] for tr in soup.find_all('tr')[1:]: tds = tr.find_all('td') word = tds[0].text.split('~~')[1] mi = float(tds[4].text) wc[word] += mi temp.append(map(lambda x:x.text,tds[0:])) x = pd.DataFrame(temp) df = pd.DataFrame() df['coll'] = x.ix[0:,0].apply(lambda x: x.split('~~')[1]) df['word'] = x.ix[0:,0].apply(lambda x: x.split('~~')[0]) df['mi'] = x.ix[0:,4] frames.append(df) #sort the results in decreasing order results = [] for w in sorted(wc, key=wc.get, reverse=True): results.append((w, wc[w])) #spit out the top result. If using ipython you can check the rest of the list by tiping `results` #viz part results_df = pd.concat(frames) G=nx.from_pandas_dataframe(results_df, 'word','coll',['mi']) mat = nx.adjacency_matrix(G).todense() viz = lgn.force(mat) vid = viz.id print(vid) url = '<iframe src="http://public.lightning-viz.org/visualizations/'+vid+'/iframe/" width=100% height=400px>' return (results[0][0].strip(),url)
def make_graph(cutdf): """ Convert dataframe of a list of edges into a networkx graph object Args: cutdf : pandas dataframe containing list of edges with features 'source' and 'target' containing node numbers Returns: networkx graph object """ g = nx.from_pandas_dataframe(cutdf, 'source', 'target') return g
def weightGraph(self, datacontacts, mi_threshold, time_treshold=0.6): if len(self.mol.get('resid', 'name CA')) != len(self.resids): raise Exception('The length of the protein doesn\'t match the Mutual Information data') contactcat = np.concatenate(datacontacts.dat) contacts_matrix = np.zeros([len(self.resids), len(self.resids)]) for i in range(contactcat.shape[1]): counter = np.count_nonzero(contactcat[:, i]) resid1 = self.residmap[self.mol.resid[datacontacts.description.atomIndexes[i][0]]] resid2 = self.residmap[self.mol.resid[datacontacts.description.atomIndexes[i][1]]] contacts_matrix[resid1][resid2] = counter self.graph_array = np.zeros([contacts_matrix.shape[0], contacts_matrix.shape[0]]) mask = (self.mi_matrix > mi_threshold) & (contacts_matrix > (time_treshold * contactcat.shape[0])) self.graph_array[mask] = self.mi_matrix[mask] intermed = [] for source in range(self.graph_array.shape[0]): for target in range(source, self.graph_array.shape[1]): if self.graph_array[source, target] != 0 and target > source: intermed.append( [int(self.resids[source]), int(self.resids[target]), float(self.graph_array[source, target])]) import pandas as pd import networkx as nx from sklearn.cluster.spectral import SpectralClustering pd = pd.DataFrame(intermed, columns=['source', 'target', 'weight']) pd[['source', 'target']] = pd[['source', 'target']].astype(type('int', (int,), {})) pd['weight'] = pd['weight'].astype(type('float', (float,), {})) G = nx.from_pandas_dataframe(pd, 'source', 'target', ['weight']) ## setSegment segids = self.mol.get('segid', 'name CA') seg_res_dict = {key: value for (key, value) in zip(self.resids, segids) if np.any(pd.loc[(pd['source'] == key)].index) or np.any(pd.loc[(pd['target'] == key)].index)} nx.set_node_attributes(G, 'Segment', seg_res_dict) ## set if not nx.is_connected(G): G = max(nx.connected_component_subgraphs(G), key=len) flow_cent = nx.current_flow_betweenness_centrality(G, weight='weight') nx.set_node_attributes(G, 'flowcent', flow_cent) Spectre = SpectralClustering(n_clusters=10, affinity='precomputed') model = Spectre.fit_predict(self.graph_array) model = model.astype(type('float', (float,), {})) spectral_dict = {key: value for (key, value) in zip(self.resids, model) if key in G.nodes()} nx.set_node_attributes(G, 'spectral', spectral_dict) self.graph = G
def corr_to_graph(roi_corrs, copy_corrs=False): """ >>> import pandas as pd >>> import numpy as np >>> corrs = pd.DataFrame(np.random.rand(2,2)) >>> corrs.index = ['A', 'B'] >>> corrs.columns = ['A', 'B'] >>> graph = corr_to_graph(corrs) >>> ab = graph['A']['B'] >>> wt, prox, dist = ab['weight'], ab['proximity'], ab['distance'] >>> assert wt == corrs['B']['A'] #upper triangular >>> assert prox == wt >>> assert dist == 1 - wt >>> assert len(graph) == 2 """ roi_corrs = create_convertible_corr_df(roi_corrs, copy_corrs) return nx.from_pandas_dataframe(roi_corrs, 'source', 'target', edge_attr=['distance', 'proximity', 'weight'])
def test_from_datafram(self, ): # Pandas DataFrame g = nx.cycle_graph(10) G = nx.Graph() G.add_nodes_from(g) G.add_weighted_edges_from((u, v, u) for u, v in g.edges()) edgelist = nx.to_edgelist(G) source = [s for s, t, d in edgelist] target = [t for s, t, d in edgelist] weight = [d['weight'] for s, t, d in edgelist] import pandas as pd edges = pd.DataFrame({'source': source, 'target': target, 'weight': weight}) GG = nx.from_pandas_dataframe(edges, edge_attr='weight') assert_nodes_equal(sorted(G.nodes()), sorted(GG.nodes())) assert_edges_equal(sorted(G.edges()), sorted(GG.edges())) GW = nx.to_networkx_graph(edges, create_using=nx.Graph()) assert_nodes_equal(sorted(G.nodes()), sorted(GW.nodes())) assert_edges_equal(sorted(G.edges()), sorted(GW.edges()))
def read_inter_RRI(inter_RRI=None, filter_rRNA=False, support_read_num=2, only_mRNA_lncRNA=False): if inter_RRI is None: inter_RRI = '/Share/home/zhangqf7/gongjing/zebrafish/data/paris/shi-zp-2/27-DG.inter.element.txt' df_inter_RRI = pd.read_csv(inter_RRI, header=None, sep='\t') if filter_rRNA: df_inter_RRI = df_inter_RRI[(df_inter_RRI[13] != 'rRNA') & (df_inter_RRI[14] != 'rRNA')] if only_mRNA_lncRNA: only_mRNA_lncRNA_index = (df_inter_RRI[13].isin(['mRNA', 'lncRNA'])) & (df_inter_RRI[14].isin(['mRNA', 'lncRNA'])) df_inter_RRI = df_inter_RRI[only_mRNA_lncRNA_index] header_ls = ['Group', 'lchr', 'lstrand', 'lstart', 'lend', 'rchr', 'rstrand', 'rstart', 'rend', 'support', 'lcount', 'rcount', 'score', 'ltype', 'rtype', 'RRI_type', 'lcontext', 'rcontext'] df_inter_RRI.columns = header_ls df_inter_RRI = df_inter_RRI[df_inter_RRI['support'] >= support_read_num] print df_inter_RRI.head() nx_inter_RRI = nx.from_pandas_dataframe(df_inter_RRI, 'lchr', 'rchr', edge_attr=['support', 'Group']) print "\nread: %s"%(inter_RRI) print nx.info(nx_inter_RRI) print return nx_inter_RRI, df_inter_RRI
def create_nwrk(self, nodes_cols, attribs_cols): """Return Ortho_Network.nwrk upon pandas.DataFrame. Parameters ------- nodes_cols: list Columns to take as nodes. attribs_cols: list Columns to take as attributes. Returns ------- P_CRAWLER.Ortho_Network.nwrk Interactions-based network. networkx.classes.graph.Graph derivate. """ self.nwrk = nx.from_pandas_dataframe(self.inter_df, nodes_cols[0], nodes_cols[1], attribs_cols)
def main(): parser = ArgumentParser() parser.add_argument("-i", "--infile", type=str, help="Table with one row per link between families, columns: ['Family1','Family2'].\ If not specified the program reads from stdin.") parser.add_argument("--maxlink", default=6, type=int, help="Maximum number of allowed outgoing links (edges) for a single protein family. Defaults to 6") parser.add_argument("--minoc", default=10, type=int, help="Minimum number of occurrences for linking two families. Defaults to 10") parser.add_argument("--trimmed_out", type=str, help="Write trimmed families to file") args = parser.parse_args() if args.infile: linkdf = pd.read_csv(args.infile, sep="\t", header=0) else: linkdf = pd.read_csv(sys.stdin, sep="\t", header=0) linkdf.fillna("",inplace=True) oc = count_occurrences(linkdf) ## Create graph from data frame g = nx.from_pandas_dataframe(linkdf,source="Node1",target="Node2") g.remove_node('') ## Trim nodes by outgoing edges and edges by occurrence [gt,trimmed_nodes,trimmed_edges] = trim_graph(g,args.maxlink, args.minoc, oc) if args.trimmed_out: pd.DataFrame(trimmed_nodes).to_csv(args.trimmed_out,sep="\t",index=False,header=False) logging.info("Removed "+str(len(trimmed_edges))+" links due to low occurrence") logging.info("Removed "+str(len(trimmed_nodes))+" families with too many links ("+str(len(gt.nodes()))+" remaining)") ## Create clusters for graph clusters = cluster(gt) logging.info(str(len(clusters))+" clusters created") cdf = pd.DataFrame(clusters).T ## Sort by number of families in cluster cdf.sort_values("num",ascending=False,inplace=True) cdf.index = list(range(1,len(cdf)+1)) ## Write clusters sorted by size write(cdf)
def advanced_visualization (airport_df, routes_df): if (airport_df is None) or (routes_df is None): print ("Data cannot be retrieved and read") else: airport_us = airport_df[(airport_df.Country == "United States") & (airport_df.Lat > 25) & (airport_df.Lat < 50) & (airport_df.Long > -130) & (airport_df.Long < -60)] us_airport_ix = airport_us.index.values routes_us = routes_df[(routes_df['Source Airport ID'].isin(us_airport_ix)) & (routes_df['Dest Airport ID'].isin(us_airport_ix))] #extract routes that flyies from AND to USA routes_us = pd.DataFrame(routes_us.groupby(['Source Airport', 'Dest Airport']).size().reset_index(name='counts')) # to find number of flights in and out of an airport # it is similar to find number of rows in which each airport occur in either one of the 2 columns counts = routes_us['Source Airport'].append(routes_us.loc[routes_us['Source Airport'] != routes_us['Dest Airport'], 'Dest Airport']).value_counts() # create a data frame of position based on names in count counts = pd.DataFrame({'IATA': counts.index, 'total_flight': counts}) pos_data = counts.merge(airport_us, on = 'IATA') # Create graph graph = nx.from_pandas_dataframe(routes_us, source = 'Source Airport', target = 'Dest Airport', edge_attr = 'counts',create_using = nx.DiGraph()) # Set up base map plt.figure(figsize=(15,20)) m = Basemap( projection='merc', llcrnrlon=-180, llcrnrlat=10, urcrnrlon=-50, urcrnrlat=70, lat_ts=0, resolution='l', suppress_ticks=True) # import long lat as m attribute mx, my = m(pos_data['Long'].values, pos_data['Lat'].values) pos = {} for count, elem in enumerate (pos_data['IATA']): pos[elem] = (mx[count], my[count]) # draw nodes and edges and overly on basemap nx.draw_networkx_nodes(G = graph, pos = pos, nodelist = [x for x in graph.nodes() if counts['total_flight'][x] >= 100], node_color = 'r', alpha = 0.8, node_size = [counts['total_flight'][x]*4 for x in graph.nodes() if counts['total_flight'][x] >= 100]) nx.draw_networkx_labels(G = graph, pos = pos, font_size=10, labels = {x:x for x in graph.nodes() if counts['total_flight'][x] >= 100}) nx.draw_networkx_nodes(G = graph, pos = pos, nodelist = [x for x in graph.nodes() if counts['total_flight'][x] < 100], node_color = 'b', alpha = 0.6, node_size = [counts['total_flight'][x]*4 for x in graph.nodes() if counts['total_flight'][x] < 100]) nx.draw_networkx_edges(G = graph, pos = pos, edge_color = 'g', width = routes_us['counts']*0.75, alpha=0.06, arrows = False) m.drawcountries(linewidth = 3) m.drawstates(linewidth = 0.2) m.drawcoastlines(linewidth=1) m.fillcontinents(alpha = 0.3) line1 = mlines.Line2D(range(1), range(1), color="white", marker='o', markerfacecolor="red") line2 = mlines.Line2D(range(1), range(1), color="white", marker='o',markerfacecolor="blue") line3 = mlines.Line2D(range(1), range(1), color="green", marker='',markerfacecolor="green") plt.legend((line1, line2, line3), ('Large Airport > 100 routes', 'Smaller airports', 'routes'), loc=4, fontsize = 'xx-large') plt.title("Network graph of flight routes in the USA", fontsize = 30) #m.bluemarble() plt.tight_layout() plt.savefig("./images/networkx_basemap/map_3.png", format = "png", dpi = 300) plt.show() print ("successful visualization") return 0
d = pd.DataFrame({'recipients': recipient,'score': score}) d = d.sort_values(['score'], ascending=False) predictedPerson = d['recipients'].iloc[0] PredictedRecipient.append(predictedPerson) df['PredictedRecipient'] = PredictedRecipient finalDf.append(df) finalDf = pd.concat(finalDf, axis=0) ############# Compute Accuracy dPerformance = compute_perfromance(finalDf) ## Networking aspect G = nx.from_pandas_dataframe(dFeatures, 'sender', 'receiver') plt.figure(figsize=(20,20)) pos = nx.spring_layout(G, k=.1) nx.draw_networkx(G, pos, node_size=25, node_color='red', with_labels=True, edge_color='blue') plt.show()
# pathway_df_n2e = helper.convert_edges_to_node(pathway_df) # node_features_df = '../output/features_{}'.format(pathway_name) # to get which nodes are sources and targets pathway_nodes_df = pd.read_csv('../data/pathways/{}-nodes.txt'. format(pathway_name), delimiter='\t') pathway_tf_nodes = pathway_nodes_df.ix[ pathway_nodes_df['node_symbol'] == 'tf'] pathway_receptor_nodes = pathway_nodes_df.ix[ pathway_nodes_df['node_symbol'] == 'receptor'] # create the full interactome as graph g = nx.from_pandas_dataframe(interactome_e2n, source='#tail', target='head', create_using=nx.DiGraph()) # add super nodes g.add_nodes_from(['super_tf', 'super_receptor']) # connect super sources receptor_list = list(pathway_receptor_nodes["#node"]) add_receptor_edges = [('super_receptor', n) for n in receptor_list] g.add_edges_from(add_receptor_edges) # connect super targets tf_list = list(pathway_tf_nodes["#node"]) add_tf_edges = [(n, 'super_tf') for n in tf_list] g.add_edges_from(add_tf_edges)
# concatenate all these edges from all these events # and write them to an edgelist. # append multiple items in one go instead of this e = e1.append(e2); e = e.append(e3); e.to_csv('edges.csv'); # I previously made the perp list by concatenating the perp1 perp2 and perp3 columns # then taking unique strings # so supposing we are starting from the point where we had perp and edge lists saved as files already perp_list = pd.read_csv("perp_list.csv"); edges = pd.read_csv("edges.csv") # make an empty graph G=nx.Graph(); # feed it edges edgelist = nx.from_pandas_dataframe(edges) # feed it nodes G.add_nodes_from(perp_list); # [Commenting this code so late that I forgot what this does. To be filled in later.] test = e.replace(to_replace=perp_list, ) # quantification of categorical traits # recalling what we have... print("Total rows: {0}".format(len(data))); print(list(data)); # quantify the traits. 1 is least governmental, 3 is most. data = data.replace(to_replace='Private Citizens & Property', value=1); data = data.replace(to_replace='Journalists & Media', value=1); data = data.replace(to_replace='Educational Institution', value=1); data = data.replace(to_replace='Abortion Related', value=1);
def to_networkx_graph(data,create_using=None,multigraph_input=False): """Make a NetworkX graph from a known data structure. The preferred way to call this is automatically from the class constructor >>> d={0: {1: {'weight':1}}} # dict-of-dicts single edge (0,1) >>> G=nx.Graph(d) instead of the equivalent >>> G=nx.from_dict_of_dicts(d) Parameters ---------- data : object to be converted Current known types are: any NetworkX graph dict-of-dicts dist-of-lists list of edges numpy matrix numpy ndarray scipy sparse matrix pygraphviz agraph create_using : NetworkX graph Use specified graph for result. Otherwise a new graph is created. multigraph_input : bool (default False) If True and data is a dict_of_dicts, try to create a multigraph assuming dict_of_dict_of_lists. If data and create_using are both multigraphs then create a multigraph from a multigraph. """ # NX graph if hasattr(data,"adj"): try: result= from_dict_of_dicts(data.adj,\ create_using=create_using,\ multigraph_input=data.is_multigraph()) if hasattr(data,'graph'): # data.graph should be dict-like result.graph.update(data.graph) if hasattr(data,'node'): # data.node should be dict-like result.node.update( (n,dd.copy()) for n,dd in data.node.items() ) return result except: raise nx.NetworkXError("Input is not a correct NetworkX graph.") # pygraphviz agraph if hasattr(data,"is_strict"): try: return nx.nx_agraph.from_agraph(data,create_using=create_using) except: raise nx.NetworkXError("Input is not a correct pygraphviz graph.") # dict of dicts/lists if isinstance(data,dict): try: return from_dict_of_dicts(data,create_using=create_using,\ multigraph_input=multigraph_input) except: try: return from_dict_of_lists(data,create_using=create_using) except: raise TypeError("Input is not known type.") # list or generator of edges if (isinstance(data,list) or isinstance(data,tuple) or hasattr(data,'next') or hasattr(data, '__next__')): try: return from_edgelist(data,create_using=create_using) except: raise nx.NetworkXError("Input is not a valid edge list") # Pandas DataFrame try: import pandas as pd if isinstance(data, pd.DataFrame): try: return nx.from_pandas_dataframe(data, create_using=create_using) except: msg = "Input is not a correct Pandas DataFrame." raise nx.NetworkXError(msg) except ImportError: msg = 'pandas not found, skipping conversion test.' warnings.warn(msg, ImportWarning) # numpy matrix or ndarray try: import numpy if isinstance(data,numpy.matrix) or \ isinstance(data,numpy.ndarray): try: return nx.from_numpy_matrix(data,create_using=create_using) except: raise nx.NetworkXError(\ "Input is not a correct numpy matrix or array.") except ImportError: warnings.warn('numpy not found, skipping conversion test.', ImportWarning) # scipy sparse matrix - any format try: import scipy if hasattr(data,"format"): try: return nx.from_scipy_sparse_matrix(data,create_using=create_using) except: raise nx.NetworkXError(\ "Input is not a correct scipy sparse matrix type.") except ImportError: warnings.warn('scipy not found, skipping conversion test.', ImportWarning) raise nx.NetworkXError(\ "Input is not a known data type for conversion.") return
import pandas as pd import networkx as nx import numpy as np import sys if sys.version_info.major < 3: sys.exit("Python 3.x or above required") if float(nx.__version__) < 1.11: sys.exit("Networkx 1.11 or above required") # Read the edge list and convert it to a network edges = pd.read_csv("all_edges.csv") F = nx.from_pandas_dataframe(edges, 'node_1', 'node_2') # Read node lists officers = pd.read_csv("Officers.csv", low_memory=False).set_index('node_id') intermediaries = pd.read_csv("Intermediaries.csv").set_index('node_id') addresses = pd.read_csv("Addresses.csv", low_memory=False).set_index('node_id') entities = pd.read_csv("Entities.csv", low_memory=False).set_index('node_id') # Combine the node lists into one dataframe officers["type"] = "officer" intermediaries["type"] = "intermediary" addresses["type"] = "address" entities["type"] = "entity" all_nodes = pd.concat([officers, intermediaries, addresses, entities]) # Do some cleanup of names all_nodes['name'] = all_nodes['name'].str.upper() all_nodes['name'] = all_nodes['name'].str.strip() # Ensure that all "Bearers" do not become a single node
# add plot.ly plotting options traces.append(make_scatter(lon_cc,lat_cc)) return traces def get_coastline_traces(): poly_paths = m.drawcoastlines().get_paths() # coastline polygon paths N_poly = 91 # use only the 91st biggest coastlines (i.e. no rivers) return polygons_to_traces(poly_paths, N_poly) traces_cc = get_coastline_traces() ############################## FRAME ABOVE FOR MAP, FRAME BELOW FOR NODES G=nx.from_pandas_dataframe(latLonPopulated_RT, source='tweetId',target='retweeter') pos = {} color = {} for i,tweet in latLonPopulated_RT.iterrows(): pos[tweet['tweetId'].upper()] = np.asarray([tweet['x'],tweet['y']]) pos[tweet['retweeter'].upper()] = np.asarray([tweet['x'],tweet['y']]) color[tweet['tweetId'].upper()] = tweet['node_color'] color[tweet['retweeter'].upper()] = tweet['node_color'] for n,p in pos.iteritems(): G.node[n]['pos'] = p for n,c in color.iteritems(): G.node[n]['color'] = c
def MedianDegree(d): G = nx.from_pandas_dataframe(d, 'actor', 'target', ['created_time']) G = sorted(list(G.degree().values())) return np.median(G)
import pandas as pd import MySQLdb as mdb from a_Model import ModelIt from events import returnTopEvents from geocode import latlon from rankingSim import artistPath import networkx as nx import unicodedata import HTMLParser from secrets import username, host, dbname, pswd engine = create_engine('mysql://%s:%s@localhost/%s'%(username,pswd,dbname)) con = None con = mdb.connect('localhost', username, pswd, dbname) simArtists = pd.read_sql_table('relArtistFull',con='mysql://%s:%s@localhost/%s'%(username,pswd,dbname)) g = nx.from_pandas_dataframe(simArtists,'Artist','RelArtist') #@app.route('/') #@app.route('/index/') #def index(): # user = { 'nickname': 'Dan' } # fake user # return render_template("index.html", # title = 'Home', # user = user) @app.errorhandler(500) def page_not_found(e): return render_template('error.html'), 500
dataTime = dataTime0.set_index("vID") #index_dataTime = dataTime.index.values #print dataTime perm = list(permutations(list_vIDs,2)) #print perm dist = [((((dataTime.loc[p[0],'gX'] - dataTime.loc[p[1],'gX']))**2) + (((dataTime.loc[p[0],'gY'] - dataTime.loc[p[1],'gY']))**2))**0.5 for p in perm] dataDist = pd.DataFrame(dist , index=perm, columns = {'dist'}) #Create the fields vID and To dataDist['FromTo'] = dataDist.index dataDist['From'] = dataDist.FromTo.str[0] dataDist['To'] = dataDist.FromTo.str[1] #I multiply by 100 in order to scale the number dataDist['weight'] = (1/dataDist.dist)*100 #Delete the intermediate FromTo field dataDist = dataDist.drop('FromTo', 1) graph = nx.from_pandas_dataframe(dataDist, 'From','To',['weight']) save_graph(graph,'D:\\zzzLola\\PhD\\DataSet\\US101\\coding\\graphs\\000_my_graph+%i.png' %time)
# -*- coding: utf-8 -*- #%% read in data - use a pandas data frame just for convenience import pandas as pd data = pd.read_table("./data/HW1_4.txt", sep = " ", header = None, names = ['vx', 'vy', 'weight']) # %% networkx section import networkx as nx # use networkx to create graph object graph = nx.from_pandas_dataframe(data, source = "vx", target = "vy", edge_attr = "weight") # get the dijkstra shortest path path_dijkstra = nx.all_pairs_dijkstra_path(graph) path_1_6_dijkstra = path_dijkstra[1][6] # get the all-pairs shortest path path_all_pairs = nx.all_pairs_shortest_path(graph) path_1_6_all_pairs = path_all_pairs[1][6]
def create_graph(dataframe, filename): graph = nx.from_pandas_dataframe(dataframe, 'x', 'y', 'weight') add_attribute(graph) nx.write_graphml(graph, filename+'.graphml')