Пример #1
0
def analyze_dynamics(gg, chronological=False, weighted=False):
    seq = infinite_jest_utils.get_section_sequence(chronological)
    out_csv_name = 'dynamics-chronological_{}-weighted_{}.csv'.format(chronological, weighted)

    # geodesic_vs_degree
    seq = infinite_jest_utils.get_section_sequence(chronological)
    csvf = open_csv_file(out_csv_name)
    if not csvf:
        return
    writer = csv.writer(csvf)
    writer.writerow(["Section", "n", "avg degree", "avg geodesic len", "num components", "largest component size"])

    # for each section, calculate avg degree & mean geodesic
    for i, G in enumerate(gg.graph_by_sections(seq, aggregate=True)):
        num_components = 1
        largest_component = (-1,0)
        try:
            if weighted:
                avg_len = nx.average_shortest_path_length(G, weight="weight")
            else:
                avg_len = nx.average_shortest_path_length(G)
        except nx.NetworkXError:
            avg_len = 0
            num_components = 0
            for ci,C in enumerate(nx.connected_components(G)):
                ni = len(C)
                if ni > largest_component[1]:
                    subG = G.subgraph(C)
                    if weighted:
                        avg_len = nx.average_shortest_path_length(subG, weight="weight")
                    else:
                        avg_len = nx.average_shortest_path_length(subG)
                    largest_component = (ci, ni)
                num_components += 1

            G = subG

        if weighted:
            degs = [k for (node,k) in G.degree(weight='weight')]
        else:
            degs = [k for (node,k) in G.degree()]
        avg_degree = sum(degs) / len(degs)
        n = nx.number_of_nodes(G)
        largest_component_size = n if num_components == 1 else largest_component[1]
        print("Section:{}\tn:{}\n  avg deg:{}, avg geodesic:{}, num_components={}"
            .format(seq[i], n, avg_degree, avg_len, num_components))
        writer.writerow([seq[i], n, avg_degree, avg_len, num_components, largest_component_size])
    csvf.close()
Пример #2
0
def analyze_neighborhood(gg, chronological=False):
    print("Neighborhood stability:")
    seq = infinite_jest_utils.get_section_sequence(chronological)
    stabilities = algos.neighborhood_stabilities(gg, seq)

    with open(os.path.join(ANALYSIS_PATH, 'neighborhood_stabilities-chronological_{}.json'.format(chronological)), 'w') as f:
        json.dump(stabilities, f)
Пример #3
0
def analyze_gender(gg, weighted=True):
    sequence = infinite_jest_utils.get_section_sequence()
    for G in gg.graph_by_sections(sequence, aggregate=True):
        last_G = G

    degrees_by_gender = defaultdict(defaultdict(list).copy)

    degrees = {
        'unweighted' : {node: degree for node, degree in G.degree()},
        'weighted' : {node: degree for node, degree in G.degree(weight='weight')}
    }

    for node in last_G.nodes():
        for degree_type, degree_type_vals in degrees.items():
            degree = degree_type_vals[node]

            degrees_by_gender[degree_type]['overall'].append(degree)
        
            gender = G.nodes[node].get('gender', 'unknown')

            if gender == None:
                gender = 'unknown'

            degrees_by_gender[degree_type][gender].append(degree)


    calculated = defaultdict(dict)
    for degree_type, degree_type_vals in degrees_by_gender.items():
        for key, vals in degree_type_vals.items():
            avg = sum(vals) / len(vals)

            calculated[degree_type][key] = avg

    with open(os.path.join(ANALYSIS_PATH, 'gender.json'), 'w') as f:
        json.dump(calculated, f)
Пример #4
0
def section_bars():
    chronological = infinite_jest_utils.get_section_sequence(
        chronological=True)
    booktime = infinite_jest_utils.get_section_sequence(chronological=False)

    # Make a figure and axes with dimensions as desired.
    fig = plt.figure(figsize=(8, 3))
    ax1 = fig.add_axes([0.05, 0.80, 0.9, 0.15])
    ax2 = fig.add_axes([0.05, 0.475, 0.9, 0.15])

    rainbow = matplotlib.cm.rainbow(np.linspace(0, 1, 192))
    norm = matplotlib.colors.Normalize(vmin=1, vmax=192)
    bounds = [1, 192]

    booktime = [None for _ in range(len(chronological))]
    new_chronological = [i + 1 for i in range(len(chronological))]
    for book, chrono in enumerate(chronological):
        booktime[chrono - 1] = book + 1

    chronological_colormap = [rainbow[i - 1] for i in new_chronological]

    cb1 = matplotlib.colorbar.ColorbarBase(
        ax1,
        cmap=matplotlib.colors.ListedColormap(chronological_colormap),
        norm=norm,
        orientation='horizontal',
        ticks=bounds,
    )
    cb1.set_label('chronological ordering')

    booktime_colormap = [rainbow[i - 1] for i in booktime]

    cb2 = matplotlib.colorbar.ColorbarBase(
        ax2,
        cmap=matplotlib.colors.ListedColormap(booktime_colormap),
        norm=norm,
        orientation='horizontal',
        ticks=bounds,
    )
    cb2.set_label('book ordering')

    plt.savefig(os.path.join(PLOTS_PATH, 'section_bars.png'))
    plt.show()
    plt.close(fig)
Пример #5
0
def analyze_communities(gg):
    sequence = infinite_jest_utils.get_section_sequence()
    for G in gg.graph_by_sections(sequence, aggregate=True):
        last_G = G

    auto_communities = generate_greedy_modularity_communities(G)

    labeled_communities = defaultdict(list)
    for node in G.nodes():
        community = G.nodes[node].get('association')
        name = G.nodes[node]['name']

        if not community:
            community = name

        labeled_communities[community].append(name)

    labeled_communities = [frozenset(l) for l in labeled_communities.values()]
    result = algos.normalized_mutual_information(auto_communities, labeled_communities)
    print(result)

    num_nodes = len(list(last_G.nodes()))
    matrix = [[0 for _ in range(num_nodes)] for _ in range(num_nodes)]
    unused_id = 0
    node_ids = {}
    for node in last_G.nodes():
        if node_ids.get(node, -1) == -1:
            node_ids[node] = unused_id
            unused_id += 1

        for _node in last_G.neighbors(node):
            if node_ids.get(_node, -1) == -1:
                node_ids[_node] = unused_id
                unused_id += 1

            matrix[node_ids[node]][node_ids[_node]] = 1

    indexed_labeled_communities = []
    for l in labeled_communities:
        indexed_labeled_communities.append([node_ids[x] for x in l])

    q = algos.calculate_modularity(matrix, indexed_labeled_communities)
    print(q)
Пример #6
0
                        help="chronological or book time")

    args = parser.parse_args()

    gg = Graphify(edge_thresh=50)

    web = webweb()
    web.display.colorBy = 'degree'
    web.display.sizeBy = 'degree'
    web.display.l = 60
    web.display.c = 120
    web.display.scaleLinkWidth = True
    # web.display.w = 600
    # web.display.h = 600

    section_sequence = infinite_jest_utils.get_section_sequence(
        chronological=False)
    for G in gg.graph_by_sections(section_sequence,
                                  aggregate=True,
                                  decay_weights=True,
                                  stability=40):
        web.networks.infinite_jest.add_layer_from_networkx_graph(G)

    section_sequence = infinite_jest_utils.get_section_sequence(
        chronological=True)
    for G in gg.graph_by_sections(section_sequence,
                                  aggregate=True,
                                  decay_weights=True,
                                  stability=40):
        web.networks.infinite_jest_chronological.add_layer_from_networkx_graph(
            G)