def run(self, data, seed=None):
        if data.is_directed():
            raise UnsupportedException("only undirected graph is supported")
        if seed is not None: self.logger.info("seed ignored")
        UGraph = convert.to_snap(data)
        CmtyV = snap.TCnComV()
        timecost, modularity = utils.timeit(
            lambda: snap.CommunityCNM(UGraph, CmtyV))
        clusters = {}
        i = 0
        for Cmty in CmtyV:
            clusters[i] = []
            for NI in Cmty:
                clusters[i].append(NI)
            i += 1

        self.logger.info(
            "Made %d clusters in %f seconds. modularity of the graph is %f" %
            (len(clusters), timecost, modularity))

        result = {}
        result['timecost'] = timecost
        result['runname'] = self.name
        result['dataname'] = data.name
        result['meta'] = self.get_meta()
        result['modularity'] = modularity
        result['clusters'] = clusters

        save_result(result)
        self.result = result
        return self
Пример #2
0
def analizzaClausetNewmanMoore(pfPaj, pfAINN, pfMod):
    # prende un grafo in formato Pajek
    # restituisce le comunita come ID Nome Comunita
    g = snap.LoadPajek(snap.PUNGraph, pfPaj)

    comunita = snap.TCnComV()
    modularity = snap.CommunityCNM(g, comunita)
    dMod = {}  # {numero : classe}
    classe = 0
    for com in comunita:
        # print('comunita {} = '.format(classe), end='' )
        for nodo in com:
            # print('{} '.format(nodo), end='')
            dMod.update({nodo: classe})
        classe += 1
        # print('')
    print('Numero di comunita analizzaClausetNewmanMoore: {} modularity: {}'.
          format(classe, modularity))

    dNum = {}
    with open(pfAINN, 'rb') as fAINN:
        for line in fAINN:
            autID, autNum, autNome = line.rstrip().split('\t')
            autNum = int(autNum)
            dNum.update({autNum: [autID, autNome]})
    # print(dNum)
    with open(pfMod, 'wb') as fMod:
        for autNum in dNum:
            fMod.write('{}\t{}\t{}\r\n'.format(dNum[autNum][0],
                                               dNum[autNum][1], dMod[autNum]))

    return classe  # numero di comunita trovate
Пример #3
0
def community_partition(G):
    CommuV = snap.TCnComV()
    modularity = snap.CommunityCNM(G, CommuV)
    ComutyH = snap.TIntIntH()
    partition = 0
    for community in CommuV:
        for NI in community:
            ComutyH[NI] = partition
        partition = partition + 1
    return ComutyH
Пример #4
0
def comDetect(algorithm, clusterCommands, Graph, conn, cur):
    CmtyV = snap.TCnComV()
    before_time = time.time()
    if algorithm == "gn":
        modularity = snap.CommunityGirvanNewman(Graph, CmtyV)
    if algorithm == 'cnm':
        modularity = snap.CommunityCNM(Graph, CmtyV)
    print "Total handling time is: ", (time.time() - before_time)
    createTable(clusterCommands, CmtyV, conn, cur)
    print "The modularity of the network is %f" % modularity
Пример #5
0
def getCnn():
    G1, id2, synset2, _, _, _ = generate_word_graph(True, False, False)
    print(G1.GetNodes())
    CmtyV = snap.TCnComV()
    modularity = snap.CommunityCNM(G1, CmtyV)
    for Cmty in CmtyV:
        print "Community: "
        for NI in Cmty:
            print NI
    print "The modularity of the network is %f" % modularity
Пример #6
0
    def run(self):
        snap.DelSelfEdges(self.graph)
        community_list = snap.TCnComV()
        snap.CommunityCNM(self.graph, community_list)

        self.community_list = list()
        for community in community_list:
            cmty = list()
            for node in community:
                cmty.append(node)
            self.community_list.append(cmty)
Пример #7
0
def split_communities(C_Net):
    CmtyV = snap.TCnComV()
    modularity = snap.CommunityCNM(C_Net, CmtyV)
    print(len(CmtyV))  # number of communities
    Cs = []
    for Cmty in CmtyV:
        NIdV = snap.TIntV()
        for NI in Cmty:
            NIdV.add(NI)
        Cs.append(NIdV)
    return Cs
Пример #8
0
def community_cnm(G):
	CmtyV = snap.TCnComV()
	modularity = snap.CommunityCNM(G,CmtyV)
	#print modularity
	ret_list = []
	for Cmty in CmtyV:
		temp = []
		for NI in Cmty:
			temp.append(NI)
		ret_list.append(temp)
	return ret_list
def getComms(graph):
    comms = snap.TCnComV()
    modularity = snap.CommunityCNM(graph, comms)
    print 'Modularity', modularity
    commDict = {}
    for i in xrange(len(comms)):
        for id in comms[i]:
            if id in commDict:
                'node in more than one comm?'
            else:
                commDict[id] = i
    return comms, commDict
def get_community_CNM(file_path, output_path):
    Graph, H = load_graph(file_path)
    Graph = convert_to_undirected(Graph)
    CmtyV = snap.TCnComV()
    modularity = snap.CommunityCNM(Graph, CmtyV)
    output_str = 'Modularity: ' + str(
        modularity) + '\nNum of communities: ' + str(
            len(CmtyV)) + '\nCommunities:\n'
    for Cmty in CmtyV:
        output_str += str(len(Cmty)) + '\n'
    with open(output_path, 'w') as f:
        f.write(output_str)
Пример #11
0
def get_communities(G_Undir, chords_dict):
    print("************")
    print("Communities")
    snap.DelSelfEdges(G_Undir)
    CmtyV = snap.TCnComV()
    modularity = snap.CommunityCNM(G_Undir, CmtyV)
    for Cmty in CmtyV:
        print "Community: size", Cmty.Len()
        for NI in Cmty:
            print chords_dict[NI]
        print ""
        print ""
    print "The modularity of the network is %f" % modularity
    def detectCommunities(self,
                          algo="CNM",
                          snapgraph=None,
                          H=None,
                          nxGraph=None,
                          write=True,
                          printout=True):
        '''
        Detect communities using Clauset-Newman-Moore modularity-based greedy
        algorithm or Girvan-Neman betweeness-centrality based algorithm.

        Returns results as a dictionary and saves a text-file version.
        '''
        # Creates a SNAP Graph Object if none is provided
        if snapgraph == None:
            snapgraph, H = self.buildSnapGraph(networkxGraph=nxGraph)
            # G = snapgraph
        # Detect community and calculate modularity
        networkxGraph = H.Graph
        start = time.time()
        CmtyV = snap.TCnComV()
        if algo == "CNM":
            modularity = snap.CommunityCNM(snapgraph, CmtyV)
        else:
            modularity = snap.CommunityGirvanNewman(snapgraph, CmtyV)
        if printout == True:
            i = 1
            for Cmty in CmtyV:
                print(f"Community {i}: ")
                print(list(Cmty))
                # communities.append(list(Cmty))
                print("\n")
                i += 1
        print("The modularity of the network is %f" % modularity)
        print(f"Time : {time.time()-start} seconds")
        # Save to text file at savepath if write = True
        if write == True:
            self.writeTxt(CmtyV, modularity)
        # Create a dataframe of community assignments
        communities = []
        for i, cmty in enumerate(CmtyV):
            for c in cmty:
                communities.append((c, i + 1))
        community_df = pd.DataFrame(communities,
                                    columns=['hashtag_id', 'CNM_Label'])
        # Export labeleed graph as a JSON file.
        nx.set_node_attributes(
            networkxGraph,
            community_df.set_index('hashtag_id').to_dict('index'))
        print("Community labelled graph exported as ", self.name_arg, ".json")
        H.exportGraph("JSON")
Пример #13
0
def comunityDetect(graph):

    CmtyV = snap.TCnComV()
    # modularity = snap.CommunityGirvanNewman(graph, CmtyV)
    modularity = snap.CommunityCNM(graph, CmtyV)
    
    list_comunity = []
    for Cmty in CmtyV:
        comunity = []
        for NI in Cmty:
            comunity.append(NI)
        
        list_comunity.append(comunity)
    
    return list_comunity, modularity
Пример #14
0
def CNM_Graph(G1):
    CmtyV = snap.TCnComV()
    modularity = snap.CommunityCNM(G1, CmtyV)
    count = 0
    sizes = []
    communities = []
    for Cmty in CmtyV:
        listcmty = []
        for NI in Cmty:
            listcmty.append(NI)

        communities.append(listcmty)
        count += 1
        sizes.append(len(listcmty))

    return sizes, communities, modularity, count
def community_detection(input, output):
    print("Loading graph...")
    FIn = snap.TFIn(input)
    graph = snap.TNGraph.Load(FIn)

    ugraph = snap.ConvertGraph(snap.PUNGraph, graph)

    print("Performing community detection...")
    CmtyV = snap.TCnComV()
    modularity = snap.CommunityCNM(ugraph, CmtyV)
    print("Modularity:", modularity)

    with open(output, "w") as file:
        for Cmty in CmtyV:
            file.write(repr([NI for NI in Cmty]))
            file.write("\n")
Пример #16
0
def out_modularity_cnm(g):
    """将cnm方法检测到的社区写入文件"""
    community_vector = snap.TCnComV()
    modularity = snap.CommunityCNM(g, community_vector)
    i = 0
    s = 'The modularity of the network is ' + str(modularity)
    com_set = []
    for community in community_vector:
        tmp_com = [i]
        # print "Community: "
        for j in community:
            # print(i)
            # print type(i)  # int
            tmp_com.append(j)
        com_set.append(tmp_com)
        i += 1
    file_op.write_file_li(com_set, 'H:/social_network_analysis/Email-Enron-result.txt', s)
Пример #17
0
def labelCNMCommunity(graph):

    communities = {}
    CmtyV = snap.TCnComV()
    modularity = snap.CommunityCNM(graph, CmtyV)
    # print "The modularity of the network is %f" % modularity

    communityIndex = 1
    for Cmty in CmtyV:
        for nid in Cmty:
            if Cmty.Len() == 1:
                communities[nid] = 0.0
            else:
                communities[nid] = communityIndex
        communityIndex += 1

    return communities
def same_community(G, n1, n2, method="CNM"):
    deleted = False
    if G.IsEdge(n1, n2):
        G.DelEdge(n1, n2)
        deleted = True

    CmtyV = snap.TCnComV()
    if method == "CNM":
        modularity = snap.CommunityCNM(G, CmtyV)
    elif method == "GN":
        modularity = snap.CommunityGirvanNewman(G, CmtyV)

    for cmty in CmtyV:
        cmty_set = set(cmty)
        if n1 in cmty and n2 in cmty:
            if deleted: G.AddEdge(n1, n2)
            return 1
    if deleted: G.AddEdge(n1, n2)
    return 0
Пример #19
0
    def build_graph_analytics(self):
        """
        Put the structures together for the graph analytics
        """
        for (idx, node) in enumerate(self.nodes):
            self.node_to_id[node] = idx
            self.id_to_node[idx] = node

        self.g_article_posts = snap.TUNGraph.New(len(self.node_to_id), len(self.up_likes_articles))
        for node in self.nodes:
            self.g_article_posts.AddNode(self.node_to_id[node])
        for (a, b) in self.up_likes_articles:
            self.g_article_posts.AddEdge(self.node_to_id[a], self.node_to_id[b])
        for (a, b) in self.up_posts_to_articles:
            self.g_article_posts.AddEdge(self.node_to_id[a], self.node_to_id[b])
        self.cmtyv = snap.TCnComV()
        self.modularity = snap.CommunityCNM(self.g_article_posts, self.cmtyv)
        self.sig_communities = [[node for node in cc] for cc in self.cmtyv if cc.Len() > 2]
        self.sig_communities_by_id = [[self.id_to_node[id] for id in l] for l in self.sig_communities]
        self.user_post_graph_cnm =  nx.Graph()
        self.user_post_graph_cnm.add_nodes_from(self.nodes)
Пример #20
0
def community_cnm_with_limit(G,limit_nodes):
	finished_list = []
	unfinished_list = []
	graph_nodes = []
	for i in G.Nodes():
		graph_nodes.append(i.GetId())
	unfinished_list.append(graph_nodes)
	print "detection starts"
	sys.stdout.flush()
	CmtyV = snap.TCnComV()
	while len(unfinished_list) > 0:
		result_nodes = []
		list_nodes = unfinished_list.pop()
		#create Graph for CNM
		if len(list_nodes) != G.GetNodes():
			TG = generate_graph(G,list_nodes)
		else:
			TG = G
		#community detection with CNM
		modularity = snap.CommunityCNM(TG,CmtyV)
		#judge the number of the nodes in the community detected by the CNM
		for Cmty in CmtyV:
			result_nodes = []
			for NI in Cmty:
				result_nodes.append(NI)
			if(len(result_nodes) > limit_nodes):
				unfinished_list.append(result_nodes)
			else:
				finished_list.append(result_nodes)

		print "the size of finished_list : %d " % len(finished_list)
		sys.stdout.flush()
		print "the size of unfinished_list : %d " % len(unfinished_list)
		sys.stdout.flush()
		if TG != G:
			TG.Clr()
	print "detection end"
	sys.stdout.flush()
	return finished_list
Пример #21
0
def generate_graph(n_nodes=50, out_degree=None, seed=1):
    """
    This method generates a Graph based on the Barabasi Algorithm and computes several metrics:
    1) It finds the Node with the maximum Degree.
    2) It finds the Node with the maximum PageRank Score.
    3) Calculates communities within the graph by using two different algorithms:
        a) Girvan - Newman community Detection
        b) Clauset-Newman-Moore community Detection.
        
    :param n_nodes: int. Specifies the number of nodes for the graph to be created.
    :param out_degree: int. Specifies the outer degree for each node. If None, then a random integer is generated
                            between 5 and 20.
    :param seed: Int. An integer that is used to generate the same 'random' integer for the out degree.
    :return: Boolean. Whether the execution time of the specific community detection algorithms is over 10 minutes.
    """

    if out_degree is None:
        random.seed(seed)
        out_degree = random.randint(5, 20)

    print
    print "Generating Graph with %s Nodes of Out Degree: %s " % (n_nodes,
                                                                 out_degree)

    # Generating a random graph based on the Barabasi Algorithm.
    barabasi_graph = snap.GenPrefAttach(n_nodes, out_degree)

    # Finding the node ID with the maximoun Degree.
    maximum_degree_node = snap.GetMxDegNId(barabasi_graph)

    # Iterating in the graph nodes in order to find the Maximum degree for this particular node.
    for NI in barabasi_graph.Nodes():
        if NI.GetId() == maximum_degree_node:
            print "Node: %d, Maximum Degree %d" % (NI.GetId(), NI.GetDeg())

    # Computing the PageRank score of every node in Graph

    # Setting the ID and the PageRank score to -1. (minimum of both of these is 0)
    page_rank_id, page_rank_score = -1, -1

    # Creating the iterator for the PageRank algorithm.
    PRankH = snap.TIntFltH()
    # Calculating the PageRank for every Node.
    snap.GetPageRank(barabasi_graph, PRankH)

    # By iterating on each node we find the Node with the maximum PageRank Score.
    for node in PRankH:
        if PRankH[node] > page_rank_score:
            page_rank_score = PRankH[node]
            page_rank_id = node

    print
    print "Node with the Highest PageRank value: "
    print "Node: %s, PageRank value %s " % (page_rank_id, page_rank_score)
    print

    try:
        start_Girvan_Newman = time.time(
        )  # setting the timer for the first community detection algorithm.

        # Calculating Girvan - Newman community Detection Algorithm
        CmtyV = snap.TCnComV()
        snap.CommunityGirvanNewman(barabasi_graph, CmtyV)
        print 'Girvan-Newman community Detection Algorithm: Execution Time: ', time.time(
        ) - start_Girvan_Newman

        # Calculating Girvan-Newman community Detection Algorithm
        start_Clauset_Newman_Moore = time.time(
        )  # setting the timer for the second community detection algorithm.
        CmtyV = snap.TCnComV()
        snap.CommunityCNM(barabasi_graph, CmtyV)
        print 'Clauset-Newman-Moore community Detection Algorithm: Execution Time: ', time.time(
        ) - start_Clauset_Newman_Moore

        print '-' * 100
        print '-' * 100

        if time.time(
        ) - start_Girvan_Newman > 10 * 60:  # if the total execution time for both algorithms is over 10
            # minutes then return False in order to quit the loop that this method will be used in.
            return False

        return True

    except MemoryError:  # if we get a memory error during the Community Detection algorithms we set to False in order
        # to avoid adding more Nodes when running this method in a while loop.
        return False
Пример #22
0
H = HashtagGraph(name="ClausetNewmanMoore", ret_maximal_subgraph=True)
H.load_raw_csv("data/Hashtag_Raw_Data/COVID19_hashtags7.csv")

networkxGraph = H.Graph

G = snap.TUNGraph.New()

for n in list(networkxGraph.nodes):
    G.AddNode(n)
for src, trgt in list(networkxGraph.edges):
    G.AddEdge(src, trgt)

start = time.time()
CommunityVec = snap.TCnComV()
modularity = snap.CommunityCNM(G, CommunityVec)

print("The modularity of the network is %f" % modularity)
print(f"Time : {time.time()-start} seconds")

communities = []
for i, cmty in enumerate(CommunityVec):
    for c in cmty:
        communities.append((c, i+1, networkxGraph.degree[c]))

community_df = pd.DataFrame(communities, columns=['hashtag_id', 'CNM_Label', 'Degree'])

nx.set_node_attributes(networkxGraph, community_df.set_index('hashtag_id').to_dict('index'))

for i, j in networkxGraph.edges:
    if networkxGraph.nodes[i]['CNM_Label'] == networkxGraph.nodes[j]['CNM_Label']:
Пример #23
0
f_in = snap.TFIn(FOLDED_POSTID_GRAPH_PATH)
post_graph = snap.TUNGraph.Load(f_in)
print "nodes", post_graph.GetNodes()
print "edges", post_graph.GetEdges()

COMMUNITIES_PATH = path.join(BASE_PATH,
                             'postid-communities-with-postbodies.txt')
COMMUNITIES_VEC_PATH = path.join(BASE_PATH, 'postid-communities.vector')

# remove degree-1 nodes
assert snap.CntSelfEdges(post_graph) == 0
snap.DelDegKNodes(post_graph, 1, 1)

comm_vec = snap.TCnComV()
modularity = snap.CommunityCNM(post_graph, comm_vec)

f_out = snap.TFOut(COMMUNITIES_VEC_PATH)
comm_vec.Save(f_out)
f_out.Flush()

f_in = snap.TFIn(COMMUNITIES_VEC_PATH)
comm_vec = snap.TCnComV()
comm_vec.Load(f_in)

print "communities", len(comm_vec)

pickle_file = open(POSTID_PICKLE, 'rb')
postid_dict = pickle.load(pickle_file)

community_dict = collections.defaultdict(int)
Пример #24
0
nodes_file = open(sys.argv[1])

graph = snap.TUNGraph.New()

for line in nodes_file:
    graph.AddNode(int(line.strip()))

nodes_file.close()

edges_file = open(sys.argv[2])
comment_symbol = "#"
edges = 0
for line in edges_file:
    if line.startswith(comment_symbol):
        continue
    else:
        nodes = line.split()
        graph.AddEdge(int(nodes[0].strip()), int(nodes[1].strip()))
        edges += 1

communities = snap.TCnComV()

modularity = snap.CommunityCNM(graph, communities)

string = ""

for com in communities:
    print " ".join([str(node) for node in com])

Пример #25
0
def get_communities(G):
    CmtyV = snap.TCnComV()
    modularity = snap.CommunityCNM(G, CmtyV)
    return [[IId for IId in Community] for Community in CmtyV], modularity
Пример #26
0
def main():
    tag_posts_dict = {}
    graph = snap.PUNGraph.New()

    tree = et.parse(current_dir + '/dataset/Posts.xml')
    doc = tree.getroot()

    # tree = et.parse(current_dir + '/dataset/Posts.xml')
    # doc = tree.getroot()

    for row in doc.findall('row'):
        if row.get('PostTypeId') == '1' and row.get('OwnerUserId') is not None:
            graph.AddNode(int(row.get('Id')))
            tags = row.get('Tags')
            if tags is not None:
                tags_list = tags.split('><')
                for tag in tags_list:
                    # if tags is not None:
                    # 	tags_list = tags.split('><')
                    # 	for tag in tags_list:
                    tag = re.sub('[<>]', '', tag)
                    if tag not in tag_posts_dict:
                        tag_posts_dict[tag] = [row.get('Id')]
                    else:
                        tag_posts_dict[tag].append(row.get('Id'))

    for tag, posts in tag_posts_dict.iteritems():
        for src_post in posts:
            for dest_post in posts:
                # for src_post in posts:
                # 	for dest_post in posts:
                if src_post != dest_post and not graph.IsEdge(
                        int(src_post), int(dest_post)):
                    graph.AddEdge(int(src_post), int(dest_post))

    communities_list_list = []  # communities of questions
    CmtyV = snap.TCnComV()
    modularity = snap.CommunityCNM(graph, CmtyV)
    for Cmty in CmtyV:
        community_list = []
        for NI in Cmty:
            community_list.append(str(NI))
        communities_list_list.append(community_list)

    questions_list = get_questionsid_list()

    users_questions_dict = {}

    tree = et.parse(current_dir + '/dataset/Users.xml')
    doc = tree.getroot()

    for row in doc.findall('row'):
        users_questions_dict[row.get('Id')] = []

    tree = et.parse(current_dir + '/dataset/Posts.xml')
    doc = tree.getroot()

    for row in doc.findall('row'):
        if row.get('CreationDate').split('-')[0] < '2016' and row.get(
                'PostTypeId') == '2' and row.get('OwnerUserId') is not None:
            users_questions_dict[row.get('OwnerUserId')].append(
                row.get('ParentId'))

    # for row in doc.findall('row'):
    # 	if row.get('CreationDate').split('-')[0] < '2016' and row.get('PostTypeId') == '2' and row.get('OwnerUserId') is not None:
    # 		users_questions_dict[row.get('OwnerUserId')].append(row.get('ParentId'))

    randomusersid_list_list = get_randomusersid_list_list()

    user_probability_list_dict = []
    for idx in range(100):
        user_probability_list_dict.append({})
        for user in randomusersid_list_list[idx]:
            user_probability_list_dict[idx][user] = 0

    for idx in range(100):
        question = questions_list[idx]
        users = randomusersid_list_list[idx]
        for user in users:
            questions = users_questions_dict[user]
            if questions is not None:
                for user_question in questions:
                    for community in communities_list_list:
                        if question in community and user_question in community:
                            user_probability_list_dict[idx][user] += float(1)

    check_answer_range(user_probability_list_dict)
Пример #27
0
def main():

    # Load data
    nodes = pd.read_csv("../data/nodes.csv", sep='\t', index_col=0)

    # Data in nice form
    headers = list(nodes.columns)
    nodes = np.asarray(nodes)

    # Load social network accordingly
    if path.exists("../data/youtube.graph"):
        FIn = snap.TFIn("../data/youtube.graph")
        social_network = snap.TNGraph.Load(FIn)
    else:
        edges = pd.read_csv("../data/edges.csv", sep='\t', index_col=0)
        edges = np.asarray(edges).astype(int)
        social_network = data2dag(edges, nodes.shape[0])

    # Check for self edges
    for e in social_network.Edges():
        if e.GetSrcNId() == e.GetDstNId():
            print("Self Loop Found:", e.GetSrcNId())

    # CNM Algorithm from snap.py
    print("Computing CNM")
    start = timeit.default_timer()
    CmtyV = snap.TCnComV()
    undirected = snap.ConvertGraph(snap.PUNGraph, social_network)
    snap.DelSelfEdges(undirected)
    the_modularity = snap.CommunityCNM(undirected, CmtyV)
    stop = timeit.default_timer()
    node_to_cmty = np.zeros(nodes.shape[0])
    cmty_sizes = np.zeros(len(CmtyV))
    for i in range(len(CmtyV)):
        for node in CmtyV[i]:
            node_to_cmty[node] = i
        cmty_sizes[i] = len(CmtyV[i])
    cmtys = [[node for node in cmty] for cmty in CmtyV]
    '''
  edges = pd.read_csv("../data/edges.csv", sep='\t', index_col=0)
  edges = np.asarray(edges).astype(int)
  G = nx.Graph()
  G.add_nodes_from(range(nodes.shape[0]))
  G.add_edges_from(list(map(tuple, edges)))
  '''

    #assert(is_partition(G, cmtys))

    #print("Calculating Modularity")
    #modul = modularity(G, cmtys)
    print("Results from Clauset-Newman-Moore:")
    #print("Modularity:",modul)
    print("Number of clusters:", len(CmtyV))
    print("Time elapsed:", stop - start)

    # Fun category stuff to do
    upload_col = headers.index('category')
    categories = set()
    for i in range(nodes.shape[0]):
        categories.add(nodes[i][upload_col])
    idx_to_categories = list(categories)
    print("Number of categories:", len(idx_to_categories))
    categories_to_idx = dict()
    for i in range(len(idx_to_categories)):
        categories_to_idx[idx_to_categories[i]] = i

    # Communities and categories
    cmty_category_count = np.zeros((len(CmtyV), len(idx_to_categories)))
    for i in range(nodes.shape[0]):
        cmty_category_count[int(node_to_cmty[i]),
                            categories_to_idx[nodes[i][upload_col]]] += 1
    cmty_category_count = cmty_category_count / cmty_sizes[:, np.newaxis]

    # Create graphs per category
    plt.figure()
    plt.plot(sorted(np.max(cmty_category_count, axis=1), reverse=True),
             label="Top proportion")
    plt.plot(0.5 * np.ones(cmty_category_count.shape[0]),
             label="Majority Threshold",
             linestyle='dashed')
    plt.title("Category Proportions in Clusters")
    plt.xlabel("Cluster")
    plt.ylabel("Proportion")
    plt.legend()
    plt.savefig("../figures/category_top_clusters.png")
    '''
  for i in range(cmty_category_count.shape[0]):
    top_category = np.argmax(cmty_category_count[i])
    print("Community "+str(i)+": "+str(idx_to_categories[top_category])+",",cmty_category_count[i][top_category])
  '''
    '''
print "SD> INFO: Creating Networkx Graph..."
G = snap.LoadEdgeList(snap.PUNGraph, filepath, 0, 1, edgedelimiter)

print "SD> INFO: Run community detection algorithm"

# Compute the required connected components
CmtyV = snap.TCnComV()

for iter in range(10):
    print iter
    algo_start = time.clock()

    # Clauset-Newman-Moore
    # At every step of the algorithm two communities that contribute maximum positive value to global modularity are merged
    C = snap.CommunityCNM(G, CmtyV)

    algo_stop = time.clock()
    algo_delta = algo_delta + algo_stop - algo_start

#f.close()

proc_stop = time.clock()
proc_delta = proc_stop - proc_start

#nc = np.max([C[com] for com in C]) + 1

print "SD> INFO: Completed"
print "SD> ==========================="
print "SD> INFO: Results"
print "SD> INFO: Number of nodes: %i" % G.GetNodes()
Пример #29
0
import snap as snap
import numpy as np
count = 0
file_number = 8
#r = 65608366
r = 65608366
c = 3
node_list = np.zeros((r, c), dtype=np.int64)
print(node_list.shape)
#  "file"+"i"+".txt"
for i in range(file_number):
    f_i = i + 1
    G = snap.LoadEdgeList(snap.PUNGraph, "file" + str(f_i) + ".txt", 0, 1)
    # snap.PrintInfo(G, "QA Stats", "qa-info.txt", False)
    CmtyV = snap.TCnComV()
    modularity = snap.CommunityCNM(G, CmtyV)
    # myfile = open('community'+'i'+'.txt', 'w')
    for Cmty in CmtyV:
        count += 1
        print("Community: ")
        # myfile.write("Community" + " "+ str(count)+"\n")
        for NI in Cmty:
            print(NI)
            if node_list[NI][2] == 0:
                node_list[NI][2] = 1
                node_list[NI][1] = count
                node_list[NI][0] = NI
        # myfile.write(str(NI))
        # myfile.write("\n")
mat = np.matrix(node_list)
with open('outfile.txt', 'wb') as f:
def community_cnm(graph):
    cc_vector = sp.TCnComV()
    sp.CommunityCNM(graph, cc_vector)
    return [list(nodes) for nodes in cc_vector]