Пример #1
0
def read_nets_by_years(path):
    filenames = glob.glob(path)
    filenames = sorted(filenames)

    dates = []
    nets = []
    for filename in filenames:
        net = xnet.xnet2igraph(filename)
        net.vs['political_party'] = [
            filter_pp_name(p) for p in net.vs['political_party']
        ]
        nets.append(net.components().giant())

        base = filename.split('dep')[1].split('_')
        #         date = float(filename.split('dep')[1].split('_')[0])
        date = float(filename.split('_')[2].split('.')[0])
        dates.append(date)

    dates = np.asarray(dates)
    nets = np.asarray(nets)

    sorted_idxs = np.argsort(dates)
    dates = dates[sorted_idxs]
    nets = nets[sorted_idxs]
    return dates, nets
Пример #2
0
def xnet_input_to_wordcloud(input_file, output_file):
    graph = xn.xnet2igraph(input_file)
    wc = WordCloud(background_color="white",
                   max_words=2000,
                   scale=10,
                   contour_width=3,
                   contour_color='white')
    wc.generate("\n".join(graph.vs["paper_abstract"]))
    wc.to_file(output_file)
Пример #3
0
def calculate_dist(filenames):
    for filename in filenames:
        # print(filename)
        net = xnet.xnet2igraph(filename)
        weights = net.es['weight']
        weights = [math.sqrt(2 * (1 - w)) for w in weights]
        if len(weights) > 0:
            net.es['distance'] = weights
            xnet.igraph2xnet(net, filename[:-5] + "_dist.xnet")
        else:
            print('error', filename)
Пример #4
0
def xnet_input_to_communities_wordcloud(input_file,
                                        output_file,
                                        minYear=minYear,
                                        minKCore=minKCore):
    graph = xn.xnet2igraph(input_file)
    verticesToDelete = np.where(
        np.logical_or(
            np.array(graph.vs["year"]) < minYear,
            np.array(graph.vs["KCore"]) < minKCore))[0]
    graph.delete_vertices(verticesToDelete)
    graph = graph.clusters(mode="WEAK").giant()
    communities = graph.vs["Community"]
    sortedCommunities = sortByFrequency(communities)[0:maxCommunities]
    fig = plt.figure(figsize=(20, 5 * math.ceil(len(sortedCommunities) / 2)))
    allAbstracts = "\n".join(graph.vs["paper_abstract"])
    allFrequencies = WordCloud(
        max_words=maxAllWords).process_text(allAbstracts)
    amask = np.zeros((500, 1000), dtype='B')
    amask[:10, :] = 255
    amask[-10:, :] = 255
    amask[:, :10] = 255
    amask[:, -10:] = 255
    for index, community in enumerate(sortedCommunities):
        communityColor = (_styleColors[index]
                          if index < len(_styleColors) else "#aaaaaa")
        abstracts = "\n".join([
            vertex["paper_abstract"] for vertex in graph.vs
            if vertex["Community"] == community
        ])
        plt.subplot(math.ceil(len(sortedCommunities) / 2), 2, index + 1)
        wc = WordCloud(background_color="white",
                       max_words=maxInternalWords,
                       width=1000,
                       height=500,
                       mask=amask,
                       contour_width=10,
                       contour_color=communityColor,
                       random_state=3,
                       color_func=generateColorFunction(communityColor))

        inCommunityFrequency = wc.process_text(abstracts)
        relativeFrequencies = {
            key: frequency / math.log(allFrequencies[key] + 1)
            for key, frequency in inCommunityFrequency.items()
            if key in allFrequencies
        }
        wc.generate_from_frequencies(relativeFrequencies)

        plt.imshow(wc, interpolation='bilinear')
        plt.axis("off")

    plt.tight_layout()
    plt.savefig(output_file)
    plt.close(fig)
def get_areas_all_years():
    f = 'data/subj_areas/nets/all.xnet'
    g = xnet.xnet2igraph(f)
    print(g.vs.attributes())

    i = 0
    for c in g.community_multilevel(weights='weight'):
        for idx in c:
            #g.vs[idx]['comm'] = i
            print(g.vs[idx]['name'], end=' ')
            i += 1
        print()

    return g
def get_areas_by_year():
    files = glob.glob('data/subj_areas/nets/all_with_comm_*.xnet')

    gs = []
    for file in files:
        gs.append(xnet.xnet2igraph(file))

    year = 2008
    comms = set(gs[0].vs['comm'])
    map_to_all = [[(year, c)] for c in comms]

    for i in range(len(gs) - 3):
        g1 = gs[i]
        g2 = gs[i + 1]

        comms1 = defaultdict(lambda: [])
        comms2 = defaultdict(lambda: [])
        for v in g1.vs:
            comms1[v['comm']].append(v['name'])
        for v in g2.vs:
            comms2[v['comm']].append(v['name'])

        if len(comms1) <= 0 or len(comms2) <= 0:
            continue
        for c1, vtxs1 in comms1.items():
            sims = []
            for c2, vtxs2 in comms2.items():
                sim = jaccard(vtxs1, vtxs2)
                sims.append((sim, c2))
            s_max = max(sims)

            if s_max[0] < 0.2:
                continue
            print(sims)

            stop = False
            N = len(map_to_all)
            for j in range(N):
                m = map_to_all[j]
                if m[-1][1] == c1 and year == m[-1][0]:
                    m.append((year + 1, s_max[1]))
                    stop = True
                    break

            if not stop:
                map_to_all.append([(year, c1), (year + 1, s_max[1])])
        year += 1

    return map_to_all
Пример #7
0
def xnet_input_to_wordcloud(input_file, output_file):
    graph = xn.xnet2igraph(input_file)
    wc = WordCloud(background_color="white",
                   max_words=2000,
                   scale=10,
                   contour_width=3,
                   contour_color='white')
    if ("paper_abstract" not in graph.vertex_attributes()):
        textData = graph.vs["original_title"]
    else:
        textData = [
            "%s. %s" % (titleAndAbstract) for titleAndAbstract in zip(
                graph.vs["original_title"], graph.vs["paper_abstract"])
        ]
    wc.generate("\n".join(textData))
    wc.to_file(output_file)
Пример #8
0
def main():
    file_path = '../data/wosAPSWithPACS_WithMAG_raw.xnet'

    g = xnet2igraph(file_path)

    data = {
        'abstract': g.vs['Title and Abstract'],
        'year': g.vs['Year Published'],
        'language': g.vs['Language'],
        'adj_list': g.get_adjlist()
    }

    df = pd.DataFrame.from_dict(data)
    df['adj_list'] = df['adj_list'].astype(str)

    df.to_feather('data/all_data.feather')
def plot_(gauss, bins, map_to_all):
    fig, axs = plt.subplots(len(map_to_all),
                            1,
                            sharex=True,
                            sharey=True,
                            figsize=(12, 3 * len(map_to_all)))
    for i, group in enumerate(map_to_all):
        if len(group) == 1:
            continue
        print(i, group)
        incr, decr = [], []
        for year, c in group:
            dois = []
            f = glob.glob('data/subj_areas/nets/all_with_comm_%s.xnet' %
                          year)[0]
            # print(f)
            g = xnet.xnet2igraph(f)
            words = set(g.vs.select(comm_eq=c)['name'])
            for doi, paper in complete_data.items():
                for w in paper['infos']['subj_areas']:
                    if w in words:
                        dois.append(doi)
                        break
            dois = set(dois)

            incr_temp, decr_temp = incr_decr(dois, year, year + 1)
            incr += incr_temp
            decr += decr_temp

        range0 = (2008, 2020)
        hist0, bins_edges0 = np.histogram(incr, bins=bins, range=range0)
        hist1, bins_edges1 = np.histogram(decr, bins=bins, range=range0)

        # plt.figure(figsize=(12,3))
        y0 = np.convolve(hist0, gauss, mode='same')
        y1 = np.convolve(hist1, gauss, mode='same')
        x = np.arange(bins_edges0[0], bins_edges1[-1],
                      (bins_edges1[-1] - bins_edges1[0]) / len(y0))
        axs[i].set_title("%d - %d" % (group[0][0], group[-1][0]))
        axs[i].bar(x - 0.05, y0, width=0.05, label='incr')
        axs[i].bar(x, y1, width=0.05, label='decr')
        axs[i].legend()

    plt.tight_layout()
    plt.savefig('hist_areas.pdf')
Пример #10
0
def xnet_input_to_figure(input_file,
                         output_file,
                         minYear=minYear,
                         minKCore=minKCore):
    originalGraph = xn.xnet2igraph(input_file)
    graph = originalGraph.clusters(mode="WEAK").giant()
    verticesToDelete = np.where(
        np.logical_or(
            np.array(graph.vs["year"]) < minYear,
            np.array(graph.vs["KCore"]) < minKCore))[0]
    graph.delete_vertices(verticesToDelete)
    graph = graph.clusters(mode="WEAK").giant()

    indegree = graph.indegree()
    maxIndegree = max(indegree)
    graph.vs["vertex_size"] = [x / maxIndegree * 10 + 4 for x in indegree]

    colormap = plt.get_cmap("plasma")

    if ("Community" not in graph.vertex_attributes()):
        graph.vs["color"] = [
            convertColorToRGBAString(*colormap(math.log(value + 1)))
            for value in indegree
        ]
    else:
        communities = graph.vs["Community"]
        sortedCommunities = sortByFrequency(communities)
        communityToColor = {
            community:
            (_styleColors[index] if index < len(_styleColors) else "#aaaaaa")
            for index, community in enumerate(sortedCommunities)
        }
        graph.vs["color"] = [
            communityToColor[community] for community in communities
        ]

    for edgeIndex in range(graph.ecount()):
        sourceIndex = graph.es[edgeIndex].source
        graph.es[edgeIndex]['color'] = graph.vs["color"][sourceIndex] + "20"

    fig, ax = plt.subplots(figsize=(10, 10))
    drawGraph(graph, ax)
    plt.axis("off")
    plt.savefig(output_file)
    plt.close()
Пример #11
0
    source_by_year = 'data/1991-2019/by_year/dep_*_obstr_0.8_leidenalg'
    source_by_mandate = 'data/1991-2019/mandate/dep_*_0.8'

    # Called only once
    source = 'data/1991-2019/by_year/dep_*_obstr_0.8_leidenalg'
    filenames = glob.glob(source + '.xnet')
    calculate_dist(filenames)

    filenames_by_year = sorted(glob.glob(source_by_year + '_dist.xnet'))
    filenames_by_mandate = sorted(glob.glob(source_by_mandate + '_dist.xnet'))

    dates_by_year, dates_by_mandate = [], []
    nets_by_year, nets_by_mandate = [], []

    for filename in filenames_by_year:
        net = xnet.xnet2igraph(filename)
        net.vs['political_party'] = [
            filter_pp_name(p) for p in net.vs['political_party']
        ]
        nets_by_year.append(net.components().giant())

        date = int(filename.split('dep_')[1].split('_')[0])
        dates_by_year.append(date)

    for filename in filenames_by_mandate:
        net = xnet.xnet2igraph(filename)
        net.vs['political_party'] = [
            filter_pp_name(p) for p in net.vs['political_party']
        ]
        nets_by_mandate.append(net.components().giant())