Exemplo n.º 1
0
def rollingCPM(dynNetSN: DynGraphSN, k=3):
    """

    This method is based on Palla et al[1]. It first computes overlapping snapshot_communities in each snapshot based on the
    clique percolation algorithm, and then match snapshot_communities in successive steps using a method based on the
    union graph.

    [1] Palla, G., Barabási, A. L., & Vicsek, T. (2007).
    Quantifying social group evolution.
    Nature, 446(7136), 664.

    :param dynNetSN: a dynamic network (DynGraphSN)
    :param k: the size of cliques used as snapshot_communities building blocks
    :return: DynCommunitiesSN
    """

    DynCom = DynCommunitiesSN()
    old_communities = None
    old_graph = nx.Graph()

    graphs = dynNetSN.snapshots()

    for (date, graph) in graphs.items():
        communitiesAtT = list(
            _get_percolated_cliques(graph, k)
        )  #get the percolated cliques (snapshot_affiliations) as a list of set of nodes
        for c in communitiesAtT:
            DynCom.add_community(date, c)

        if old_communities == None:  #if first snapshot
            old_graph = graph
            dateOld = date
            old_communities = communitiesAtT

        else:
            if len(communitiesAtT) > 0:  #if there is at least one community
                union_graph = nx.compose(
                    old_graph, graph
                )  #create the union graph of the current and the previous
                communities_union = list(
                    _get_percolated_cliques(
                        union_graph,
                        k))  #get the snapshot_affiliations of the union graph

                jaccardBeforeAndUnion = _included(
                    old_communities,
                    communities_union)  #we only care if the value is above 0
                jaccardUnionAndAfter = _included(
                    communitiesAtT,
                    communities_union)  #we only care if the value is above 0

                for c in jaccardBeforeAndUnion:  #for each community in the union graph
                    matched = []
                    born = []
                    killed = []

                    allJaccards = set()
                    for oldC in jaccardBeforeAndUnion[c]:
                        for newC in jaccardUnionAndAfter[c]:
                            allJaccards.add(
                                ((oldC, newC), _singleJaccard(oldC, newC))
                            )  #compute jaccard between candidates before and after
                    allJaccards = sorted(allJaccards,
                                         key=itemgetter(1),
                                         reverse=True)
                    sortedMatches = [k[0] for k in allJaccards]

                    oldCToMatch = dict(
                        jaccardBeforeAndUnion[c])  #get all coms before
                    newCToMatch = dict(
                        jaccardUnionAndAfter[c])  #get all new coms
                    while len(
                            sortedMatches
                    ) > 0:  #as long as there are couples of unmatched snapshot_affiliations
                        matchedKeys = sortedMatches[
                            0]  #pair of snapshot_affiliations of highest jaccard
                        matched.append(matchedKeys)  #this pair will be matched

                        del oldCToMatch[matchedKeys[
                            0]]  #delete chosen com from possible to match
                        del newCToMatch[matchedKeys[1]]
                        sortedMatches = [
                            k for k in sortedMatches
                            if len(set(matchedKeys) & set(k)) == 0
                        ]  #keep only pairs of unmatched snapshot_affiliations

                    if len(oldCToMatch) > 0:
                        killed.append(list(oldCToMatch.keys())[0])
                    if len(newCToMatch) > 0:
                        born.append(list(newCToMatch.keys())[0])

                    for aMatch in matched:
                        DynCom.events.add_event(
                            (dateOld, DynCom._com_ID(dateOld, aMatch[0])),
                            (date, DynCom._com_ID(date, aMatch[1])), dateOld,
                            date, "continue")

                    for kil in killed:  #these are actual merge (unmatched snapshot_affiliations are "merged" to new ones)
                        for com in jaccardUnionAndAfter[c]:
                            DynCom.events.add_event(
                                (dateOld, DynCom._com_ID(dateOld, kil)),
                                (date, DynCom._com_ID(date, com)), dateOld,
                                date, "merged")

                    for b in born:  #these are actual merge (unmatched snapshot_affiliations are "merged" to new ones)
                        for com in jaccardBeforeAndUnion[c]:
                            DynCom.events.add_event(
                                (dateOld, DynCom._com_ID(dateOld, com)),
                                (date, DynCom._com_ID(date, b)), dateOld, date,
                                "split")

            old_graph = graph
            dateOld = date
            old_communities = communitiesAtT
    print(DynCom.snapshots)
    print(DynCom.events.nodes)
    DynCom._relabel_coms_from_continue_events()

    return (DynCom)
Exemplo n.º 2
0
def rollingCPM(dynNetSN: DynGraphSN, k=3, elapsed_time=False):
    """

    This method is based on Palla et al[1]. It first computes overlapping snapshot_communities in each snapshot based on the
    clique percolation algorithm, and then match snapshot_communities in successive steps using a method based on the
    union graph.

    [1] Palla, G., Barabási, A. L., & Vicsek, T. (2007).
    Quantifying social group evolution.
    Nature, 446(7136), 664.

    :param dynNetSN: a dynamic network (DynGraphSN)
    :param k: the size of cliques used as snapshot_communities building blocks
    :param elapsed_time: if True, will return a tuple (communities,time_elapsed)
    :return: DynCommunitiesSN
    """

    DynCom = DynCommunitiesSN()
    old_communities = None
    old_graph = nx.Graph()

    graphs = dynNetSN.snapshots()

    time_Steps = {}
    start = time.time()
    step2 = start

    total_percolation = 0
    total_match = 0

    pool = mp.Pool(mp.cpu_count())

    allComs = pool.starmap_async(__compute_communities,
                                 [(SNt, dynNetSN.snapshots(SNt), k)
                                  for SNt in graphs]).get()
    print("CD detection done", len(allComs))
    pool.close()

    com_ids = dict()
    for (date, communitiesAtT) in allComs:
        #print("------------",date)
        #for (date, graph) in graphs.items():

        #communitiesAtT = list(_get_percolated_cliques(graph, k)) #get the percolated cliques (snapshot_affiliations) as a list of set of nodes
        step1 = time.time()
        total_percolation += step1 - step2
        for current_com in communitiesAtT:
            id = DynCom.add_community(date, current_com)
            com_ids[(date, current_com)] = id

        if old_communities == None:  #if first snapshot
            old_graph = graphs[date]
            dateOld = date
            old_communities = communitiesAtT

        else:
            if len(communitiesAtT) > 0:  #if there is at least one community
                union_graph = nx.compose(
                    old_graph, graphs[date]
                )  #create the union graph of the current and the previous
                communities_union = list(
                    _get_percolated_cliques(
                        union_graph,
                        k))  #get the snapshot_affiliations of the union graph

                jaccardBeforeAndUnion = _included(
                    old_communities,
                    communities_union)  #we only care if the value is above 0
                jaccardUnionAndAfter = _included(
                    communitiesAtT,
                    communities_union)  #we only care if the value is above 0

                already_assigned = set()
                for current_com in jaccardBeforeAndUnion:  #for each community in the union graph
                    matched = []
                    born = []
                    killed = []

                    allJaccards = set()
                    for oldC in jaccardBeforeAndUnion[
                            current_com]:  #for communities included in it in t-1
                        for newC in jaccardUnionAndAfter[
                                current_com]:  # and t+1
                            if not oldC in already_assigned and not newC in already_assigned:
                                allJaccards.add(
                                    ((oldC, newC), _singleJaccard(
                                        oldC,
                                        newC)))  #compute jaccard between those

                    allJaccards = sorted(allJaccards,
                                         key=itemgetter(1),
                                         reverse=True)
                    sortedMatches = [
                        k[0] for k in allJaccards
                    ]  #list of pairs of communities in t-1 and t+1 ordered by decreasing jaccard

                    oldCToMatch = dict(jaccardBeforeAndUnion[current_com]
                                       )  #get all coms before
                    newCToMatch = dict(
                        jaccardUnionAndAfter[current_com])  #get all new coms
                    while len(
                            sortedMatches
                    ) > 0:  #as long as there are couples of unmatched communities (t-1,t+1)included in the current com
                        matchedKeys = sortedMatches[
                            0]  #pair of snapshot_affiliations of highest jaccard
                        matched.append(matchedKeys)  #this pair will be matched

                        del oldCToMatch[matchedKeys[
                            0]]  #delete chosen com from possible to match
                        del newCToMatch[matchedKeys[1]]
                        sortedMatches = [
                            k for k in sortedMatches
                            if len(set(matchedKeys) & set(k)) == 0
                        ]  #keep only pairs of unmatched snapshot_affiliations

                    if len(oldCToMatch) > 0:
                        killed.append(list(oldCToMatch.keys())[0])
                    if len(newCToMatch) > 0:
                        born.append(list(newCToMatch.keys())[0])

                    for aMatch in matched:
                        #print("--",aMatch)
                        already_assigned.add(aMatch[0])
                        already_assigned.add(aMatch[1])

                        DynCom.events.add_event(
                            (dateOld, com_ids[(dateOld, aMatch[0])]),
                            (date, com_ids[(date, aMatch[1])]), dateOld, date,
                            "continue")

                    for kil in killed:  #these are actual merge (unmatched snapshot_affiliations are "merged" to new ones)
                        for com in jaccardUnionAndAfter[current_com]:
                            DynCom.events.add_event(
                                (dateOld, com_ids[(dateOld, kil)]),
                                (date, com_ids[(date, com)]), dateOld, date,
                                "merged")

                    for b in born:  #these are actual merge (unmatched snapshot_affiliations are "merged" to new ones)
                        for com in jaccardBeforeAndUnion[current_com]:
                            DynCom.events.add_event(
                                (dateOld, com_ids[(dateOld, com)]),
                                (date, com_ids[(date, b)]), dateOld, date,
                                "split")
            step2 = time.time()
            total_match += step2 - step1

            old_graph = graphs[date]
            dateOld = date
            old_communities = communitiesAtT

    end = time.time()
    time_Steps["total"] = end - start
    time_Steps["CD"] = total_percolation
    time_Steps["match"] = total_match

    DynCom._relabel_coms_from_continue_events()

    if elapsed_time:
        return (DynCom, time_Steps)
    return (DynCom)