示例#1
0
def network_measures(graphname):

    H = nx_old.read_gml(graphname)

    ## Calculate the degree of nodes in the network

    for u, v in nx.degree(H).items():
        H.node[u]['degree'] = int(v)

    ## Compute betweenness centrality for nodes

    betweenness_dictionary = nx.betweenness_centrality(H)
    for u, v in betweenness_dictionary.items():
        H.node[u]['betweenness_centrality'] = float(v)

    ## Calculate the kshell-index of the network

    list_conn = []
    for node in H.nodes():
        list_conn.append(len(H.neighbors(node)))
    max_connect = max(list_conn)

    H.remove_edges_from(H.selfloop_edges())

    for index in range(max_connect + 1):
        k_core = nx.algorithms.core.k_shell(H, k=index)
        if len(k_core) > 0:
            for node in k_core:
                H.node[node]['kshell_index'] = int(index)

    H = nx_old.write_gml(H, str(graphname))
示例#2
0
def spatio_temporal_network_neighbor_ego_effect(gml_file):
    '''
	Here we generate the data of neighbor's contribution and Ego network's contribution
	in terms of lines of code added, lines of code removed, total number of commits
	'''

    G = nx_old.read_gml(str(gml_file))
    list_attr_dv = [
        'lines_of_code_added_sum', 'lines_of_code_added_avg',
        'lines_of_code_removed_sum', 'lines_of_code_removed_mean',
        'total_num_committs'
    ]

    ### neighbors and Ego of a committer
    for x in list_attr_dv:
        print x
        for n in G.nodes():

            try:
                G.node[n]['neigh' + '_' + str(x)] = np.mean(
                    [G.node[k][str(x)] for k in G.neighbors(n)])
                G.node[n]['ego' + '_' + str(x)] = np.mean(
                    [G.node[k][str(x)] for k in nx.ego_graph(G, n)])
            except KeyError, e:
                print n, G.neighbors(n), len(G.neighbors(n))
def network_attribute_node_type(files_csv, gml_file, col_id) : 
	### Here I introduce the attribute of the node, if it is an independent user, firm or distributor
	##col_id : column G for author id and column I for committer id

	H = nx_old.read_gml(gml_file)

	f1 = open(files_csv,'r') ## input file from raw data/processed data

	dict_feature_type_authorid = defaultdict(list); color_feature = defaultdict(list)

	for line in f1.readlines()[1:] :
		line = line.strip().split("\t")
		dict_feature_type_authorid[str(line[int(col_id)])] = str(line[5].replace('"',''))

	for n in H.nodes() :
				
				H.node[n]['node_type'] = dict_feature_type_authorid[str(H.node[n]['label'])]

				if dict_feature_type_authorid[str(H.node[n]['label'])] == "firm" : H.node[n]['node_color_id'] = 1
				if dict_feature_type_authorid[str(H.node[n]['label'])] == "independent" : H.node[n]['node_color_id'] = 2
				if dict_feature_type_authorid[str(H.node[n]['label'])] == "distributor" : H.node[n]['node_color_id'] = 3

				#print n, H.node[n]['label'], dict_feature_type_authorid[str(H.node[n]['label'])], H.node[n]['node_color_id'], H.node[n]['node_type']

	H = nx_old.write_gml(H,str(gml_file))
示例#4
0
def write_network_attr_gml(sourcedir, destdir, n1, n2):
    """
	Usage:
	1. Sourcedir : directory where the gml file (of directed graph of file dependencies are located)
	2. destdir : destination directory
	3. n1, n2: range of files we want to read and write

	We add node and edge attribute. Node attributes include:
	- roles and cartographic measures from community detection
	- number of motifs a node is part of
	- frequency of motif types a node is part of

	Edge attribute includes:
	- frequency of motif types an edge is part of

	"""
    globgml = glob.glob(sourcedir + '*.gml')

    for gml_file in globgml[int(n1):int(n2)]:
        print gml_file

        outname = gml_file.split('/')[-1]

        G = nx_old.read_gml(gml_file)
        G = nx_code_complexity.directed_network_measures(G)
        G = check_cycles(G)
        G = check_Motifs(G, 3)
        G = nx_comm_carto.communityroledetectionInfomap(G, 'Infomap')

        G = nx_old.write_gml(G, destdir + str(outname))
def spatio_temporal_network_neighbor_ego_effect(sourcedir, destdir):

    globgml = glob.glob(sourcedir + '*.gml')

    for gml_file in globgml:

        outname = gml_file.split('/')[-1]

        G = nx_old.read_gml(gml_file)
        list_attr_dv = ['lines_of_code_added_sum']

        ### neighbors and Ego of a committer
        for x in list_attr_dv:

            for n in G.nodes():

                try:
                    G.node[n]['neigh' + '_' + str(x)] = np.mean(
                        [G.node[k][str(x)] for k in G.neighbors(n)])
                    G.node[n]['ego' + '_' + str(x)] = np.mean(
                        [G.node[k][str(x)] for k in nx.ego_graph(G, n)])
                except KeyError, e:
                    print gml_file, n, G.neighbors(n), len(G.neighbors(n))

        G = nx_old.write_gml(G, destdir + str(outname))
def neighbor_attribute_node_type(files_csv, gml_file, output_txt) : 

	outf = open(str(output_txt), 'w')
#    print >> outf, 'committer_id|neighbor_mean_files|neighbor_std_files|neighbor_med_files|neighbor_mean_codes|neighbor_std_codes|neighbor_med_codes|neighbor_mean_net_contr|neighbor_std_net_contr|neighbor_med_net_contr'
#    print >> outf, 'committer_id|ego_mean_files|ego_std_files|ego_med_files|ego_mean_codes|ego_std_codes|ego_med_codes|ego_mean_net_contr|ego_std_net_contr|ego_med_net_contr'
	
	#print >> outf, 'committer_id|ego_mean_files|ego_std_files|ego_med_files|ego_mean_files_added|ego_std_files_added|ego_med_files_added|ego_mean_codes_tot|ego_std_codes_tot|ego_med_codes_tot|ego_mean_codes_added|ego_std_codes_added|ego_med_codes_added|ego_mean_net_contr_total|ego_std_net_contr_total|ego_median_net_contr_total|ego_mean_net_contr_avg|ego_std_net_contr_avg|ego_median_net_contr_avg'
	print >> outf, 'committer_id|neighbor_mean_files|neighbor_std_files|neighbor_med_files|neighbor_mean_files_added|neighbor_std_files_added|neighbor_med_files_added|neighbor_mean_codes_tot|neighbor_std_codes_tot|neighbor_med_codes_tot|neighbor_mean_codes_added|neighbor_std_codes_added|neighbor_med_codes_added|neighbor_mean_net_contr_total|neighbor_std_net_contr_total|neighbor_median_net_contr_total|neighbor_mean_net_contr_avg|neighbor_std_net_contr_avg|neighbor_median_net_contr_avg'

	H = nx_old.read_gml(gml_file) ## committer network gml file

	f1 = open(files_csv,'r') ## Data individual committer file 

	dict_num_files = defaultdict(list); dict_code_added = defaultdict(list) ; dict_net_contr_total = defaultdict(list)
	dict_num_files_added = defaultdict(list); dict_code_total = defaultdict(list) ; dict_net_contr_avg = defaultdict(list)
   
	for line in f1.readlines()[1:] :

		line = line.strip().split("|")
		dict_num_files[str(line[0])] = int(line[1]) ; dict_num_files_added[str(line[0])] = int(line[2])
		dict_code_added[str(line[0])] = int(line[3]); dict_code_total[str(line[0])] = float(line[4])
		dict_net_contr_total[str(line[0])] = float(line[5]); dict_net_contr_avg[str(line[0])] = float(line[8])

	dict_neigh_files = defaultdict(list); dict_neigh_codes_added = defaultdict(list); dict_neigh_contr_total = defaultdict(list)
	dict_neigh_files_added = defaultdict(list); dict_neigh_codes_total = defaultdict(list); dict_neigh_contr_avg = defaultdict(list)

	### neighbors of a committer
	for n in H.nodes() :
		for node in H.neighbors(n) : 
	#for n in H.nodes() :
	#    for node in nx.ego_graph(H, n) :
			if str(H.node[node]['label']) in dict_num_files :
				dict_neigh_files[str(H.node[n]['label'])].append(float(dict_num_files[str(H.node[node]['label'])]))
				dict_neigh_codes_added[str(H.node[n]['label'])].append(float(dict_code_added[str(H.node[node]['label'])]))
				dict_neigh_contr_total[str(H.node[n]['label'])].append(float(dict_net_contr_total[str(H.node[node]['label'])]))

				dict_neigh_files_added[str(H.node[n]['label'])].append(float(dict_num_files_added[str(H.node[node]['label'])]))
				dict_neigh_codes_total[str(H.node[n]['label'])].append(float(dict_code_total[str(H.node[node]['label'])]))
				dict_neigh_contr_avg[str(H.node[n]['label'])].append(float(dict_net_contr_avg[str(H.node[node]['label'])]))

	for keys, values in dict_neigh_files.items() :

		mean_files = np.mean(values); median_files = np.median(values); std_files = np.std(values)

		mean_files_added = np.mean(dict_neigh_files_added[str(keys)]); median_files_added = np.median(dict_neigh_files_added[str(keys)]); std_files_added = np.std(dict_neigh_files_added[str(keys)])

		mean_code_tot = np.mean(dict_neigh_codes_total[str(keys)]); median_code_tot = np.median(dict_neigh_codes_total[str(keys)]) ;std_code_tot = np.std(dict_neigh_codes_total[str(keys)])

		mean_code_added = np.mean(dict_neigh_codes_added[str(keys)]); median_code_added = np.median(dict_neigh_codes_added[str(keys)]) ;std_code_added = np.std(dict_neigh_codes_added[str(keys)])

		mean_net_contr_total = np.mean(dict_neigh_contr_total[str(keys)]); median_net_contr_total = np.median(dict_neigh_contr_total[str(keys)]); std_net_contr_total = np.std(dict_neigh_contr_total[str(keys)])

		mean_net_contr_avg = np.mean(dict_neigh_contr_avg[str(keys)]); median_net_contr_avg = np.median(dict_neigh_contr_avg[str(keys)]); std_net_contr_avg = np.std(dict_neigh_contr_avg[str(keys)])

#        print >> outf, '%s|%s|%s|%s|%s|%s|%s|%s|%s|%s' % ( keys, np.mean(values), np.std(values), np.median(values), mean_code_tot, std_code_tot, median_code_tot, mean_net_contr, std_net_contr, median_net_contr )

		print >> outf, '%s|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s' % ( keys, mean_files, std_files, median_files, mean_files_added, std_files_added, median_files_added, mean_code_tot, std_code_tot, median_code_tot, mean_code_added, std_code_added, median_code_added, mean_net_contr_total, std_net_contr_total, median_net_contr_total, mean_net_contr_avg, std_net_contr_avg, median_net_contr_avg)
示例#7
0
def findCommunitiesInfomap(n1, n2, detectionalgo):
    """
	Partition network with the Infomap algorithm.
	Annotates nodes with 'community' id and return number of communities found.
	"""

    globgml = glob.glob(
        '/media/mukherjee/My Book/OpenStack/WeightedDSM/Idea5_weighted_dsm/nova-*.gml'
    )
    for gml_file in globgml[int(n1):int(n2)]:
        print gml_file

        G = nx_old.read_gml(gml_file)
        infomapWrapper = infomap.Infomap("-d -N10 --silent")

        #print("Building Infomap network from a NetworkX graph...")
        for e in G.edges_iter():
            infomapWrapper.addLink(*e)

    #print("Find communities with Infomap...")
        infomapWrapper.run()

        tree = infomapWrapper.tree
        #print tree

        #print("Found %d top modules with codelength: %f" % (tree.numTopModules(), tree.codelength()))

        communities = {}
        for node in tree.leafIter(1):
            communities[node.originalLeafIndex] = node.moduleIndex()

        lv = communities.values()

        nx.set_node_attributes(G, 'community' + '__' + str(detectionalgo),
                               communities)

        #print tree.numTopModules()

        for n in G.nodes():
            if n in communities:
                G.node[n]['community_index_label' + '__' +
                          str(detectionalgo)] = str(
                              communities[n]) + '__' + str(n)
    #print G.nodes(data=True)
    #return communities #tree.numTopModules()

    #print list(set(communities.values()))

        G = rolescartography(G, str(detectionalgo))
        G = nx_old.write_gml(
            G,
            '/media/mukherjee/My Book/OpenStack/WeightedDSM/Idea5_weighted_dsm_cartography/'
            + str(gml_file.split('/')[-1]))
def gen_null_network_from_raw_data(f1, destdir, n0, n1, n2, n3, nsim, yearstr,
                                   gml1):

    data1 = open(f1, 'r')

    dict_id_name = defaultdict(list)

    G = nx_old.read_gml(gml1)

    for n in G.nodes():
        try:

            committer_name = str(G.node[n]['label'])
            lines_of_code_added_sum = str(G.node[n]['lines_of_code_added_sum'])
            tenure_committer = str(G.node[n]['tenure_committer'])
            avg_MI_committer = str(G.node[n]['avg_MI_committer'])

            dict_id_name[
                committer_name] = lines_of_code_added_sum + '|' + tenure_committer + '|' + avg_MI_committer

        except KeyError, e:
            continue
def create_output_from_gml(input_gml_file, output_txt) : 
	
	outf = open(str(output_txt), 'w')
	print >> outf, 'id|nrole|degree|betweenness_centrality|community_index_infm|kshell_index|eigenvector_centrality'
	
	H = nx_old.read_gml(input_gml_file)
	## Compute eigenvector centrality of nodes

	eigen_dictionary = nx.eigenvector_centrality(H)
	for u,v in eigen_dictionary.items() :
		H.node[u]['eigenvector_centrality'] = float(v)

	for n in H.nodes() :
		
		node_id = H.node[n]['label']
		node_deg = H.node[n]['degree']
		node_bc = H.node[n]['betweenness_centrality']
		node_com = H.node[n]['community_index_infm']
		node_role = H.node[n]['nrole']
		node_kshell = H.node[n]['kshell_index']
		node_ec = H.node[n]['eigenvector_centrality']

		print >> outf, '%s|%s|%s|%s|%s|%s|%s' % (node_id, node_role, node_deg, node_bc, node_com, node_kshell, node_ec)
示例#10
0
def measure_developer_attributes(fadd, frem, fcompl, yearstr, gml1):
    '''
	Usage:

	fadd : The file containing lines of code added by developers
	frem: The file containing lines of code removed by developers
	fcompl: The file containing code complexity
	yearstr: Which year you want to generate the network
	gml1: the gml file where we store the network with edge and node attributes

	'''

    from datetime import datetime
    import time
    data_add = open(fadd, 'r')
    data_rem = open(frem, 'r')
    G = nx_old.read_gml(gml1)
    data_compl = open(fcompl, 'r')

    dict_committer_code_add = defaultdict(list)
    dict_committer_code_rem = defaultdict(list)
    dict_committer_code_CC = defaultdict(list)
    dict_committer_code_HV = defaultdict(list)
    dict_committer_code_MI = defaultdict(list)
    dict_committer_tenure = defaultdict(list)
    dict_committer_codes_complexity = defaultdict(list)

    ### file with lines of code added info
    for line in data_add.readlines()[1:]:
        line = line.strip().split('|')
        committer_name = line[6].replace(' ', '_').replace('-', '_').replace(
            '__', '_').split('_(')[0]

        if " -" in str(line[8]):
            time_of_commit = str(line[8].split(', ')[1].split(' -')[0])
        if " +" in str(line[8]):
            time_of_commit = str(line[8].split(', ')[1].split(' +')[0])

        year = time_of_commit.split(' ')[2]
        if int(year) == int(yearstr):
            dict_committer_tenure[str(committer_name)].append(time_of_commit)
            dict_committer_code_add[str(committer_name)].append(int(line[0]))

    ### file with lines of code removed info
    for line in data_rem.readlines()[1:]:
        line = line.strip().split('|')
        committer_name = line[6].replace(' ', '_').replace('-', '_').replace(
            '__', '_').split('_(')[0]

        if " -" in str(line[8]):
            time_of_commit = str(line[8].split(', ')[1].split(' -')[0])
        if " +" in str(line[8]):
            time_of_commit = str(line[8].split(', ')[1].split(' +')[0])

        year = time_of_commit.split(' ')[2]
        if int(year) == int(yearstr):
            dict_committer_code_rem[str(committer_name)].append(int(line[0]))

    ##get matching of above two dataset
    dict_commiter_info = defaultdict(list)
    for keys, values in dict_committer_code_add.items():
        if str(keys) in dict_committer_code_rem:

            total_lines_of_code_added = np.sum(values)
            avg_lines_of_code_added = np.mean(values)

            total_lines_of_code_removed = np.sum(
                dict_committer_code_rem[str(keys)])
            avg_lines_of_code_removed = np.sum(
                dict_committer_code_rem[str(keys)])

            time_first_committed = datetime.strptime(
                str(min([x for x in dict_committer_tenure[str(keys)]])),
                '%d %b %Y %H:%M:%S')

            time_last_committed = datetime.strptime(
                str(max([x for x in dict_committer_tenure[str(keys)]])),
                '%d %b %Y %H:%M:%S')

            tenure_committer = abs(
                time.mktime(time_first_committed.timetuple()) -
                time.mktime(time_last_committed.timetuple())) * 1.0 / (60 *
                                                                       60 * 24)

            total_num_committs = len(dict_committer_tenure[str(keys)])

            dict_commiter_info[str(
                keys)] = str(total_lines_of_code_added) + '|' + str(
                    avg_lines_of_code_added
                ) + '|' + str(total_lines_of_code_removed) + '|' + str(
                    avg_lines_of_code_removed) + '|' + str(
                        tenure_committer) + '|' + str(total_num_committs)

    for line in data_compl.readlines()[1:]:
        line = line.strip().split('|')
        committer_name = str(line[4].replace(' ',
                                             '_').replace('-', '_').replace(
                                                 '__', '_').split('_(')[0])
        commit_id_code = str(line[0])
        new_line = line[8].split('+')[1].split(',')
        if len(new_line) > 1:
            start_line = int(new_line[0])
            end_line = start_line + int(new_line[1])
            year = int(line[9])
            if year == int(yearstr):
                dict_committer_codes_complexity[
                    commit_id_code + '|' +
                    committer_name] = line[11] + '|' + line[18] + '|' + line[19]

    for keys, values in dict_committer_codes_complexity.items():
        committer_name = keys.split('|')[1]
        MI = values.split('|')[0]
        HV = values.split('|')[1]
        CC = values.split('|')[2]

        dict_committer_code_MI[str(committer_name)].append(float(MI))
        dict_committer_code_HV[str(committer_name)].append(float(HV))
        dict_committer_code_CC[str(committer_name)].append(float(CC))

    dict_commiter_info_compl = defaultdict(list)

    for keys, values in dict_committer_code_MI.items():

        avg_MI_committer = np.mean(values)
        avg_HV_committer = np.mean(dict_committer_code_HV[str(keys)])
        avg_CC_committer = np.mean(dict_committer_code_CC[str(keys)])

        if str(keys) in dict_commiter_info:

            info = dict_commiter_info[str(keys)]
            dict_commiter_info_compl[str(
                keys)] = info + '|' + str(avg_MI_committer) + '|' + str(
                    avg_HV_committer) + '|' + str(avg_CC_committer)

    for nodes in G.nodes():
        if str(G.node[nodes]['label']) in dict_commiter_info_compl:

            info = dict_commiter_info_compl[str(G.node[nodes]['label'])]

            lines_of_code_added_sum = info.split('|')[0]
            lines_of_code_added_avg = info.split('|')[1]
            lines_of_code_removed_sum = info.split('|')[2]
            lines_of_code_removed_mean = info.split('|')[3]
            tenure_committer = info.split('|')[4]
            total_num_committs = info.split('|')[5]

            MI = info.split('|')[6]
            HV = info.split('|')[7]
            CC = info.split('|')[8]

            G.node[nodes]['lines_of_code_added_sum'] = float(
                lines_of_code_added_sum)
            G.node[nodes]['lines_of_code_added_avg'] = float(
                lines_of_code_added_avg)
            G.node[nodes]['lines_of_code_removed_sum'] = float(
                lines_of_code_removed_sum)
            G.node[nodes]['lines_of_code_removed_mean'] = float(
                lines_of_code_removed_mean)
            G.node[nodes]['tenure_committer'] = float(tenure_committer)
            G.node[nodes]['total_num_committs'] = float(total_num_committs)
            G.node[nodes]['avg_MI_committer'] = float(MI)
            G.node[nodes]['avg_HV_committer'] = float(HV)
            G.node[nodes]['avg_CC_committer'] = float(CC)
    ### triangles ###

    for node, val in nx.triangles(G).items():
        G.node[node]['triangle'] = float(val)

    ### local clustering coefficient ###
    weightlists = [
        'wt_n_com_code', 'mean_spatial_inter', 'std_spatial_inter',
        'median_spatial_inter', 'diff_90_10_spatial',
        'wt_hm_diff_first_commit_time', 'wt_hsum_diff_first_commit_time',
        'wt_hm_diff_last_commit_time', 'wt_hsum_diff_last_commit_time',
        'wt_mean_joint_commit', 'wt_sum_joint_commit',
        'wt_mu_inter_commit_time', 'wt_std_inter_commit_time'
    ]
    for wtl in weightlists:
        for node, val in nx.clustering(G, weight=str(wtl)).items():
            G.node[node]['LCC' + '_' + str(wtl)] = float(val)

    list_attr_dv = [
        'lines_of_code_added_sum', 'lines_of_code_added_avg',
        'lines_of_code_removed_sum', 'lines_of_code_removed_mean',
        'total_num_committs'
    ]

    G = nx_old.write_gml(G, str(gml1))
示例#11
0
def gen_visibility_matrix_dsm_adjacency(sourcedir, destdir, n1, n2):
    """
	generate the visibility matrix of DSM and evaluate the 
	propagation cost as defined by Baldwin
	"""

    globgml = glob.glob(sourcedir + 'nova-*.gml')

    outfile = open(
        destdir + 'commitID_propagation_costs__2012' + '__' + str(n1) + '__' +
        str(n2) + '.txt', 'w')

    for gml_file in globgml[int(n1):int(n2)]:
        #print gml_file
        commitid = gml_file.split('/')[-1][:-4].split('-')[1]

        G = nx_old.read_gml(gml_file)

        #### get the visibility matrix from DSM ###

        nodelist = G.nodes()
        path_to_descedants = []

        for n in nodelist:

            ## Get the list of descendants of a node.
            desc = nx.descendants(G, n)

            path_to_descedants.append(len(list(desc)))

        power = max(path_to_descedants)

        adjacency_matrix = nx.to_numpy_matrix(
            G, weight=None)  ## Adjacency matrix of directed graph
        adjacency_matrix_wt = nx.to_numpy_matrix(
            G, weight='weight')  ## Weighted Adjacency matrix of directed graph

        M1 = adjacency_matrix

        ## Visibility matrix
        visibilityM = 0
        visibilityM_w = 0
        for k in range(power):

            visibilityM += M1**int(k)
            visibilityM_w += adjacency_matrix_wt**int(k)

        ## Propagation cost
        column_sums = [
            sum([row[i] for row in visibilityM])
            for i in range(0, len(visibilityM[0]))
        ]
        propagation_cost_dsm = np.sum(column_sums) * 1.0 / (int(len(G.nodes()))
                                                            **2)

        wcolumn_sums = [
            sum([row[i] for row in visibilityM_w])
            for i in range(0, len(visibilityM_w[0]))
        ]
        propagation_cost_dsm_wc = np.sum(wcolumn_sums) * 1.0 / (int(
            len(G.nodes()))**2)

        print >> outfile, '%s|%s|%s|%s|%s|%s|%s' % (
            commitid, propagation_cost_dsm, propagation_cost_M1_col,
            propagation_cost_M5_col, propagation_cost_M8_col,
            propagation_cost_M9_col, propagation_cost_M10_col)
def plot_network_nx(graphname, iter1, sizeres, edgewidth):
	from networkx.drawing.nx_agraph import graphviz_layout
	G = nx_old.read_gml(graphname)
	
	fig = plt.figure()
	ax = fig.add_subplot(111)    

	pos2 = nx.spring_layout(G,iterations=int(iter1))
	#pos2 = graphviz_layout(G, prog='neato')

	nodelist_firm = []; nodelist_ind = []; nodelist_distr = []
	nodelist_firm_clr = []; nodelist_ind_clr = []; nodelist_distr_clr = []
	nodelist_firm_sz = []; nodelist_ind_sz = []; nodelist_distr_sz = []
	nodekshell = []; nodesize = []; nodelist = []; nodesattr = []; colorf = []; colord = []; colori = []

	for n in G.nodes() :
		nodekshell.append(int(G.node[n]['kshell_index']))
		nodesize.append(100*(1+np.log(float(G.node[n]['degree']))))
		nodelist.append(int(G.node[n]['id']))
		nodesattr.append((G.node[n]['id'], int(sizeres)*(np.log10(1+float(G.node[n]['degree']))), G.node[n]['kshell_index'], G.node[n]['node_type']))

	print min(nodekshell), max(nodekshell), nodekshell


	for (ind, sz, clr, type1) in nodesattr :
		if str(type1) == "distributor" :
			nodelist_distr.append(ind)
			nodelist_distr_sz.append(sz)
			nodelist_distr_clr.append(float(clr-min(nodekshell))*1.0/float(max(nodekshell) - min(nodekshell)))

		if str(type1) == "independent" :
			nodelist_ind.append(ind)
			nodelist_ind_sz.append(sz)
			nodelist_ind_clr.append(float(clr-min(nodekshell))*1.0/float(max(nodekshell) - min(nodekshell)))

		if str(type1) == "firm" :
			nodelist_firm.append(ind)
			nodelist_firm_sz.append(sz)
			nodelist_firm_clr.append(float(clr-min(nodekshell))*1.0/float(max(nodekshell) - min(nodekshell)))
	for colors in nodelist_firm_clr :
		colorf.append(plt.cm.jet(colors))
	for colors in nodelist_distr_clr :
		colord.append(plt.cm.jet(colors))
	for colors in nodelist_ind_clr :
		colori.append(plt.cm.jet(colors))

	#cax = ax.imshow([nodekshell],cmap=plt.cm.jet,interpolation="nearest")
	#cbar = plt.colorbar(cax)
	#cbar.set_label('k', size=10)

	# define the colormap
	#cmap = plt.cm.jet
	# extract all colors from the map
	#cmaplist = [cmap(i) for i in range(cmap.N)]
	# force the first color entry to be grey
	#cmaplist[0] = (.5,.5,.5,1.0)
	# create the new map
	#cmap = cmap.from_list('Custom cmap', cmaplist, cmap.N)

# define the bins and normalize
	#bounds = list(set(nodekshell))
	#bounds.sort()
	#norm = mpl.colors.BoundaryNorm(bounds, cmap.N)

	#cb = mpl.colorbar.ColorbarBase(ax, cmap=cmap, norm=norm, spacing='proportional', ticks=bounds, boundaries=bounds, format='%1i')

	#cmap = plt.cm.get_cmap('jet', len(list(set(nodekshell)))) 
	#cax = ax.imshow([nodekshell],cmap=cmap,interpolation="bilinear")
	
	#cbar = plt.colorbar(cax)
	#cbar.set_ticks([])

	nx.draw_networkx_nodes(G,pos2,nodelist=nodelist_firm, node_size = nodelist_firm_sz, node_shape = 'o', node_color=colorf, cmap=plt.cm.jet, linewidths=None)
	nx.draw_networkx_nodes(G,pos2,nodelist=nodelist_ind, node_size = nodelist_ind_sz, node_shape = 'o', node_color=colori, cmap=plt.cm.jet, linewidths=None)
	nx.draw_networkx_nodes(G,pos2,nodelist=nodelist_distr, node_size = nodelist_distr_sz, node_shape = 'o', node_color=colord, cmap=plt.cm.jet, linewidths=None)

	nx.draw_networkx_edges(G,pos2,edgelist=None,width=int(edgewidth), edge_color='k',style='solid',alpha=0.35)
	
	plt.axis('off')
	plt.show()
def null_model_shuffle_edge_weights(gml1, year, nsim):

    ## Here we shuffle the weight of edges (temporal and spatial). Thus two edge-pairs (u1,v1) of weight w1
    ## and (u2, v2) of weight w2 shuffled and resultant weighted edge-pairs are (u1, v1, w1) and (u2, v2, w2)
    ## Then we get the strength of the nodes as per the weighted spatial and temporal edges

    ##create the destination folder ##

    destdir = '../../Idea03_06/Data/Null/networks_from_edge_val_shuffle/' + str(
        year) + '/'
    for i in range(0, int(nsim) + 1, 1):
        G = nx_old.read_gml(gml1)

        ## generate a null network and add the links from the observed network
        G_null = nx.Graph()

        spatial_list_edge = []
        temporal_list_edge = []
        edge_list = []
        committer_names_list_with_attr = []

        for (u, v, d) in G.edges(data=True):
            spatial_list_edge.append(d['mean_spatial_inter'])
            temporal_list_edge.append(d['wt_mu_inter_commit_time'])
            edge_list.append((G.node[u]['label'], G.node[v]['label']))

        shuffle(spatial_list_edge, random)
        shuffle(temporal_list_edge, random)

        #shuffle_spatial = sample(spatial_list_edge, len(spatial_list_edge))
        #shuffle_temporal = sample(temporal_list_edge, len(temporal_list_edge))

        z = zip(edge_list, spatial_list_edge, temporal_list_edge)

        G_null.add_weighted_edges_from([(z1[0], z1[1], z2)
                                        for (z1, z2, z3) in z],
                                       weight="mean_spatial_inter")
        G_null.add_weighted_edges_from([(z1[0], z1[1], z3)
                                        for (z1, z2, z3) in z],
                                       weight="wt_mu_inter_commit_time")

        weightlists = ["mean_spatial_inter", "wt_mu_inter_commit_time"]

        for wtl in weightlists:
            for u, v in nx.degree(G_null, weight=str(wtl)).items():
                G_null.node[u]['s' + '_' + str(wtl)] = float(v)

        dict_id_name = defaultdict(list)

        for n in G.nodes():

            committer_name = str(G.node[n]['label'])
            lines_of_code_added_sum = str(G.node[n]['lines_of_code_added_sum'])
            tenure_committer = str(G.node[n]['tenure_committer'])
            avg_MI_committer = str(G.node[n]['avg_MI_committer'])

            dict_id_name[
                committer_name] = lines_of_code_added_sum + '|' + tenure_committer + '|' + avg_MI_committer

        for n in G_null.nodes():
            if str(n) in dict_id_name:
                vals = dict_id_name[str(n)]

                G_null.node[n]['label'] = str(n)
                G_null.node[n]['lines_of_code_added_sum'] = float(
                    vals.split('|')[0])
                G_null.node[n]['tenure_committer'] = float(vals.split('|')[1])
                G_null.node[n]['avg_MI_committer'] = float(vals.split('|')[2])

        gmlw = destdir + 'sim__' + str(i) + '.gml'

        G_null = nx_old.write_gml(G_null, str(gmlw))