Пример #1
0
def plot_grouped_adjacency():
    fig, ax = plt.subplots(1, 1, figsize=(6, 4))

    for l, (g, title) in enumerate([(g_cs, "Computer Science")]):
        # Vertices are ordered by prestige in the dataset
        adj = nx.to_numpy_matrix(g, dtype=int)

        # Scale adjacency matrix by a vertex's outdegree.
        # Edges i -> j are from row_i -> col_j
        groups = np.linspace(0, 100, 11)
        grouped_by_row = []
        for i, row in enumerate(adj):
            in_edges = []
            for rank, edges in enumerate(row[0].tolist()[0]):
                for j in range(int(edges)):
                    in_edges.append(rank)
            grouped_row, _ = np.histogram(in_edges, groups)
            grouped_by_row.append(grouped_row)

        grouped = [np.zeros(len(groups) - 1) for i in range(len(groups) - 1)]
        for i, row in enumerate(grouped_by_row):
            for j in range(len(groups) - 1):
                if i <= groups[j + 1]:
                    for k, elem in enumerate(row):
                        grouped[j][k] += elem
                    break

        colors = iter(cm.rainbow(np.linspace(0, 1, 3)))
        r, g, b = next(colors)[:3]  # Unpack RGB vals (0. to 1., not 0 to 255).
        cdict = {
            'red': ((0.0, 1.0, 1.0), (1.0, r, r)),
            'green': ((0.0, 1.0, 1.0), (1.0, g, g)),
            'blue': ((0.0, 1.0, 1.0), (1.0, b, b))
        }
        custom_cmap = LinearSegmentedColormap('custom_cmap', cdict)
        cax = ax.matshow(grouped, cmap=custom_cmap)

        ax.xaxis.set_major_locator(ticker.MultipleLocator(1))
        ax.yaxis.set_major_locator(ticker.MultipleLocator(1))

        labels = ['%.0f' % group for group in groups]
        ax.set_xticklabels(labels, fontsize=12)
        ax.set_yticklabels(labels, fontsize=12)

        if l == 0:
            ax.set_ylabel(r"Prestige of PhD Institution, $\pi$", fontsize=16)
            ax.set_xlabel(r"Prestige of Hiring Institution, $\pi$",
                          fontsize=16)

    plot_utils.finalize(ax)

    plt.tight_layout()
    plt.savefig("results/grouped_adjacency.eps", format='eps', dpi=1000)
    plt.clf()
Пример #2
0
def plot_centrality():
    colors = iter(cm.rainbow(np.linspace(0, 1, 3)))
    markers = Line2D.filled_markers
    fig = plt.figure(figsize=(6.0, 4.))
    ax = plt.gca()

    for i, (faculty_graph, school_metadata, dept) in enumerate([(g_cs, meta_cs, "Computer Science")]):
        x = []; y = []
        max_pi = 0
        max_c = 0
        ccs = sorted(nx.strongly_connected_components(faculty_graph), key=len, reverse=True)
        cc = ccs[0]
        for vertex in cc:
            c = 0
            path_lengths = nx.single_source_shortest_path_length(faculty_graph, source=vertex).values()
            if len(path_lengths) > 0:
                c = np.nanmean(path_lengths)
            label = school_metadata[vertex]['institution']
            x.append(school_metadata[vertex]['pi'])
            y.append(c)
            if school_metadata[vertex]['pi'] > max_pi:
                max_pi = school_metadata[vertex]['pi']
            if c > max_c:
            	max_c = c

            if label in ['MIT']:
                plt.annotate(label, xy=(school_metadata[vertex]['pi'], c), color='0.4', xytext=(50, 50), textcoords='offset points', ha='center', va='bottom', arrowprops={'arrowstyle': '-', 'color': '0.4'}, fontsize = plot_utils.LEGEND_SIZE, zorder=2)
                plt.annotate(label, xy=(school_metadata[vertex]['pi'], c), color='white', xytext=(50, 50), textcoords='offset points', ha='center', va='bottom', arrowprops={'arrowstyle': '-', 'color': 'white', 'lw': '2.1'}, zorder = 1, fontsize = plot_utils.LEGEND_SIZE)
            if label in ['University of Colorado, Boulder']:
                if label == 'University of Colorado, Boulder':
                    label = 'University of Colorado,\nBoulder'
                plt.annotate(label, xy=(school_metadata[vertex]['pi'], c), color='0.4', xytext=(25, -45), textcoords='offset points', ha='center', va='bottom', arrowprops={'arrowstyle': '-', 'color': '0.4'}, zorder = 4, fontsize = plot_utils.LEGEND_SIZE)
                plt.annotate(label, xy=(school_metadata[vertex]['pi'], c), color='white', xytext=(25, -45), textcoords='offset points', ha='center', va='bottom', arrowprops={'arrowstyle': '-', 'color': 'white', 'lw': '2.1'}, zorder = 3, fontsize = plot_utils.LEGEND_SIZE)
            if label in ['New Mexico State University']:
                if label == 'New Mexico State University':
                    label = 'New Mexico\nState University'
                plt.annotate(label, xy=(school_metadata[vertex]['pi'], c), color='0.4', xytext=(25, -100), textcoords='offset points', ha='center', va='bottom', arrowprops={'arrowstyle': '-', 'color': '0.4'}, zorder = 6, fontsize = plot_utils.LEGEND_SIZE)
                plt.annotate(label, xy=(school_metadata[vertex]['pi'], c), color='white', xytext=(25, -100), textcoords='offset points', ha='center', va='bottom', arrowprops={'arrowstyle': '-', 'color': 'white', 'lw': '2.1'}, zorder = 5, fontsize = plot_utils.LEGEND_SIZE)
        ax.scatter(x, y, edgecolor='w', clip_on=False, zorder=1, color=next(colors), s=28)

        slope, intercept, r_value, p_value, std_err = linregress(x, y)
       	plt.plot([0, max(x)], [slope*i + intercept for i in [0, max(x)]], color=plot_utils.ALMOST_BLACK, label='Slope: %.4f\n$R^{2}$: %.4f' % (slope, r_value**2), zorder=7)

    plt.xlabel(r'Universities Sorted by Prestige, $\pi$', fontsize=plot_utils.LABEL_SIZE)
    plt.ylabel(r'Average Path Length, $\langle \ell \rangle$', fontsize=plot_utils.LABEL_SIZE)

    plot_utils.finalize(ax)
    plt.xlim(0, max_pi)
    plt.ylim(1, max_c)
    plt.legend(loc='upper left', fontsize=plot_utils.LEGEND_SIZE, frameon=False)
    plt.savefig("results/centrality.eps", bbox_inches='tight', format='eps', dpi=1000)
    plt.clf()
Пример #3
0
def plot_random_hop_size(cache_dirs, ylim=(0, 1)):
    fig, ax = plt.subplots(1, 1, figsize=(6.0, 4.0), sharey=True)

    (title, cache_dir) = cache_dirs
    cache = pickle.load(open(cache_dir, 'rb'))
    meta = meta_of_dir(cache_dir)
    graph = graph_of_dir(cache_dir)
    results_size = defaultdict(list)
    for p in cache["size"].keys():
        for node, sizes in cache["size"][p].items():
            if node is bad_node_of_dir(cache_dir):
                continue

            avg = np.average(sizes)
            if not np.isnan(avg) and not np.isinf(avg):
                result = (meta[node]["pi"], avg)
                results_size[p].append(result)

        results_size[p] = sorted(results_size[p], key=lambda x: x[0])

    filtered = sorted(cache["size"].keys())[1::2]
    length_of_results = len(filtered)

    colors = iter(cm.rainbow(np.linspace(0, 1, length_of_results)))
    markers = Line2D.filled_markers; count = -1
    for p, data in sorted(results_size.items(), key=lambda x: x[0]):
        if p not in filtered:
            continue
        c = next(colors); count += 1; m = markers[count]
        ax.scatter(*zip(*data), color=c, label='{0:.2f}'.format(p), s=28, marker=m, edgecolor='w', clip_on=False, zorder=1)

        x = [pi for (pi, length) in data if not np.isnan(length) and not np.isinf(length)]
        max_pi = max(x)
        if p > 0:
            # Fit a logistic curve to this
            y = [length for (pi, length) in data if not np.isnan(length) and not np.isinf(length)]

            popt, pcov = curve_fit(curve, np.array(x), np.array(y), bounds=(0., [1., 2., 200.]))
            y = curve(x, *popt)

            ax.plot(x, y, color=c)

    ax.set_xlim(0, max_pi)

    ax.set_xlabel(r'University Prestige, $\pi$', fontsize=plot_utils.LABEL_SIZE)
    ax.set_ylabel(r'Epidemic Size, $\frac{Y}{N}$', fontsize=plot_utils.LABEL_SIZE)
    plot_utils.finalize(ax)
        
    plt.legend(loc='center left', bbox_to_anchor=(1, 0.5), fontsize=plot_utils.LEGEND_SIZE, title="Jump\nProbability, $q$", scatterpoints=1, frameon=False)
    plt.ylim(ylim)
    plt.savefig('results/size-results-of-ALL-SI-random-hops.eps', bbox_inches='tight', format='eps', dpi=1000)
Пример #4
0
def plot_size_infection_probability(cache_dirs,
                                    threshold=0.00,
                                    bins=range(0, 100, 10)):
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12.0, 4.0), sharey=True)

    (title, cache_dir) = cache_dirs
    cache = pickle.load(open(cache_dir, 'rb'))
    meta = meta_of_dir(cache_dir)
    graph = graph_of_dir(cache_dir)
    results_size = defaultdict(list)

    for p in cache["size"].keys():
        for node, sizes in cache["size"][p].items():
            if node is bad_node_of_dir(cache_dir):
                continue

            pi = meta[node]["pi"]
            avg = np.average(sizes)
            if not np.isnan(avg) and not np.isinf(avg):
                result = (p, avg)
                results_size[pi].append(result)

    # Remove data below a threshold
    for pi, data in results_size.copy().items():
        trend = [size for _, size in data]
        if max(trend) <= threshold:
            del results_size[pi]
        else:
            results_size[pi] = sorted(data, key=lambda x: x[0])

    # Bin the remaining data
    if bins != None:
        left_endpoint = bins[0]
        percentiles = np.percentile(results_size.keys(), bins[1:])
        bin_means = defaultdict(list)
        for i, bin_edge in enumerate(percentiles):
            bin_values = []
            for pi in results_size.keys():
                if left_endpoint < pi <= bin_edge:
                    bin_values.extend(results_size[pi])
                    #break

            bin_means[(i +
                       1)] = average_across_infection_probability(bin_values)
            left_endpoint = bin_edge
        results_size = bin_means

    length_of_results = len(results_size.keys())

    colors = iter(cm.rainbow(np.linspace(0, 1, length_of_results)))
    for pi, data in sorted(results_size.items(), key=lambda x: x[0]):
        data = sorted(data, key=lambda x: x[0])
        c = next(colors)

        ax1.scatter(*zip(*data),
                    color=c,
                    label='{0}'.format(int(pi * 10)),
                    edgecolor='w',
                    clip_on=False,
                    zorder=1,
                    s=28)

        # Fit a logistic curve to this
        x = [
            p for (p, size) in data
            if not np.isnan(size) and not np.isinf(size)
        ]
        y = [
            size for (p, size) in data
            if not np.isnan(size) and not np.isinf(size)
        ]
        popt, pcov = curve_fit(curve,
                               np.array(x),
                               np.array(y),
                               bounds=([0., -150., -5.], [1., 0., 5.]))
        x_fine = np.arange(0.0, 1.01, 0.01)
        y = curve(x_fine, *popt)
        ax1.plot(x_fine, y, color=c)

        r = -2.7
        k = 0.91

        def scale_x(x, d):
            return (1.0 * x) / (-1.0 * math.log(1.0 - d))

        x = []
        y = []
        for x_i, y_i in data:
            if scale_x(x_i, pi * (1.0 / 10.0)) > 0:
                x.append(scale_x(x_i, pi * (1.0 / 10.0)))
                y.append(y_i)

        if pi in [1]:
            ax2.scatter(x,
                        y,
                        color=c,
                        label='{0}st Decile'.format(int(pi)),
                        edgecolor='w',
                        clip_on=False,
                        zorder=1,
                        s=28)
        elif pi in [2]:
            ax2.scatter(x,
                        y,
                        color=c,
                        label='{0}nd Decile'.format(int(pi)),
                        edgecolor='w',
                        clip_on=False,
                        zorder=1,
                        s=28)
        elif pi in [3]:
            ax2.scatter(x,
                        y,
                        color=c,
                        label='{0}rd Decile'.format(int(pi)),
                        edgecolor='w',
                        clip_on=False,
                        zorder=1,
                        s=28)
        elif pi in [4, 5, 6, 7, 8, 9]:
            ax2.scatter(x,
                        y,
                        color=c,
                        label='{0}th Decile'.format(int(pi)),
                        edgecolor='w',
                        clip_on=False,
                        zorder=1,
                        s=28)

    # Fit a curve to the whole thing!
    def scale_y(scaled_x):
        return (1.0 / (1.0 + math.exp(r * (k + math.log(scaled_x)))))

    x_total = np.arange(0.01, 10.01, 0.01)
    ax2.plot(x_total, [scale_y(i) for i in x_total],
             color='black',
             label="Generic")

    ax1.tick_params(labelsize=12)
    ax2.tick_params(labelsize=12)
    ax1.set_ylim(0, 1.)
    ax2.set_ylim(0, 1.)
    ax2.set_xscale("log")
    ax2.set_xlim(0.04, 10)
    ax1.set_xlim(0, 1)

    ax1.set_xlabel(r'Transmission Probability, $p$',
                   fontsize=plot_utils.LABEL_SIZE)
    ax2.set_xlabel(r'Effective Transmission Probability, $p^{*}$',
                   fontsize=plot_utils.LABEL_SIZE)
    ax1.set_ylabel(r'Epidemic Size, $\frac{Y}{N}$',
                   fontsize=plot_utils.LABEL_SIZE)

    plot_utils.finalize(ax1)
    plot_utils.finalize(ax2)

    plt.legend(loc='center left',
               bbox_to_anchor=(1, 0.5),
               fontsize=plot_utils.LEGEND_SIZE,
               scatterpoints=1,
               frameon=False)
    plt.savefig('results/infectious-size-results-of-ALL-SI.eps',
                bbox_inches='tight',
                format='eps',
                dpi=1000)
Пример #5
0
def plot_si_prestige_length(cache_dirs, ylim=(0, 5)):
    fig, ax = plt.subplots(1, 1, figsize=(6.0, 4.0), sharey=True)

    (title, cache_dir) = cache_dirs
    cache = pickle.load(open(cache_dir, 'rb'))
    meta = meta_of_dir(cache_dir)
    graph = graph_of_dir(cache_dir)
    results_length = defaultdict(list)
    for p in cache["length"].keys():
        for node, lengths in cache["length"][p].items():
            if node is bad_node_of_dir(cache_dir):
                continue

            avg = np.average(lengths)
            y = normalize(graph, node, avg)
            if not np.isnan(avg) and not np.isinf(avg) and not np.isnan(y):
                result = (meta[node]["pi"], y)
                results_length[p].append(result)

        results_length[p] = sorted(results_length[p], key=lambda x: x[0])

    for ratio, data in results_length.copy().items():
        avg_by_prestige = defaultdict(list)
        for pi, length in data:
            avg_by_prestige[pi].append(length)

        results_length[ratio] = [(pi, np.average(lengths))
                                 for pi, lengths in avg_by_prestige.items()]
        results_length[ratio] = sorted(results_length[ratio],
                                       key=lambda x: x[0])

    filtered = sorted(cache["length"].keys())[1::2]
    length_of_results = len(filtered)

    colors = iter(cm.rainbow(np.linspace(0, 1, length_of_results)))
    markers = Line2D.filled_markers
    count = -1
    for p, data in sorted(results_length.items(), key=lambda x: x[0]):
        if p not in filtered:
            continue
        c = next(colors)
        count += 1
        m = markers[count]
        ax.scatter(*zip(*data),
                   color=c,
                   label='{0:.2f}'.format(p),
                   s=28,
                   marker=m,
                   edgecolor='w',
                   clip_on=False,
                   zorder=1)

        x = np.array([
            pi for (pi, length) in data
            if not np.isnan(length) and not np.isinf(length)
        ])
        max_pi = max(x)
        y = np.array([
            length for (pi, length) in data
            if not np.isnan(length) and not np.isinf(length)
        ])

        # Fit a linear curve to this
        # regr = LinearRegression()
        # regr.fit(x.reshape(-1, 1), y.reshape(-1, 1))
        # interval = np.array([min(x), max(x)])
        # ax.plot(interval, interval*regr.coef_[0] + regr.intercept_, color=c)

        # Fit a LOWESS curve to this
        lowess = sm.nonparametric.lowess
        z = lowess(y, x, return_sorted=False)
        ax.plot(x, z, color=c)

    ax.set_xlim(0, max_pi)
    ax.tick_params(labelsize=12)
    ax.set_xlabel(r'University Prestige, $\pi$',
                  fontsize=plot_utils.LABEL_SIZE)
    ax.set_ylabel(r'Normalized Epidemic Length, $\frac{L}{\ell}$',
                  fontsize=plot_utils.LABEL_SIZE)
    plot_utils.finalize(ax)

    plt.ylim(ylim)
    plt.legend(loc='center left',
               bbox_to_anchor=(1, 0.5),
               fontsize=plot_utils.LEGEND_SIZE,
               title='Transmission\nProbability, $p$',
               frameon=False,
               scatterpoints=1)
    plt.savefig('results/length-results-of-ALL-SI.eps',
                bbox_inches='tight',
                format='eps',
                dpi=1000)
Пример #6
0
def plot_sis_or_sir_prestige_length(cache_dirs, epidemic_type, ylim=(0, 10)):
    fig, axarray = plt.subplots(1,
                                len(cache_dirs),
                                figsize=(6.9 * 2, 5.0),
                                sharey=True)
    for i, ax in enumerate(axarray):
        (title, cache_dir) = cache_dirs[i]
        print("title: {0}".format(title))
        cache = pickle.load(open(cache_dir, 'rb'))
        meta = meta_of_dir(cache_dir)
        graph = graph_of_dir(cache_dir)
        results_length = defaultdict(list)
        for (p, r) in cache["length"].keys():
            if r == 0.0:
                continue  # can't divide by zero below. is this the right thing to do?
            for node, lengths in cache["length"][p, r].items():
                #print(np.average(lengths))
                if node is bad_node_of_dir(cache_dir):
                    continue
                result = (meta[node]["pi"],
                          normalize(graph, node, np.average(lengths)))
                results_length[p / r].append(result)

        # different values of p and r will return the same p/r. average these values?
        for ratio, data in results_length.copy().items():
            avg_by_prestige = defaultdict(list)
            for pi, length in data:
                avg_by_prestige[pi].append(length)

            results_length[ratio] = [
                (pi, np.average(lengths))
                for pi, lengths in avg_by_prestige.items()
            ]
            results_length[ratio] = sorted(results_length[ratio],
                                           key=lambda x: x[0])

        filtered = ["1.0", "2.0", "3.0", "4.0", "5.0"]
        length_of_results = len(filtered)

        colors = iter(cm.rainbow(np.linspace(0, 1, length_of_results)))
        markers = Line2D.filled_markers
        count = -1
        for ratio, data in sorted(results_length.items(), key=lambda x: x[0]):
            if "%.1f" % ratio not in filtered:
                continue
            c = next(colors)
            count += 1
            ax.scatter(*zip(*data),
                       color=c,
                       label='{0:.2f}'.format(ratio),
                       marker=markers[count],
                       edgecolor='w',
                       clip_on=False,
                       zorder=1,
                       s=28)

            # fit a linear curve to this
            x = np.array([
                pi for (pi, length) in data
                if not np.isnan(length) and not np.isinf(length)
            ])
            max_pi = max(x)
            y = np.array([
                length for (pi, length) in data
                if not np.isnan(length) and not np.isinf(length)
            ])

            regr = LinearRegression()
            regr.fit(x.reshape(-1, 1), y.reshape(-1, 1))
            interval = np.array([min(x), max(x)])
            print("infection probability: {0}\tcurve_fit: {1}".format(
                ratio, [regr.coef_[0], regr.intercept_]))
            ax.plot(interval,
                    interval * regr.coef_[0] + regr.intercept_,
                    color=c)

        ax.set_xlim(0, max_pi)
        #ax.set_title(title, y=1.05, fontsize=16)
        ax.tick_params(labelsize=12)
        if i == 0:
            ax.set_xlabel(r'University Prestige, $\pi$',
                          fontsize=plot_utils.LABEL_SIZE)
            ax.set_ylabel(r'Normalized Epidemic Length, $L$',
                          fontsize=plot_utils.LABEL_SIZE)
        plot_utils.finalize(ax)

    plt.legend(loc='center left',
               bbox_to_anchor=(1, 0.5),
               fontsize=plot_utils.LEGEND_SIZE,
               title=r'$p/r$',
               scatterpoints=1,
               frameon=False)
    plt.ylim(ylim)
    plt.savefig(
        'results/test/length-results-of-ALL-{0}.eps'.format(epidemic_type),
        bbox_inches='tight',
        format='eps',
        dpi=1000)
    plt.clf()
Пример #7
0
def plot_sis_or_sir_prestige_size(cache_dirs, epidemic_type, ylim=(0, 1)):
    fig, axarray = plt.subplots(1,
                                len(cache_dirs),
                                figsize=(6.9 * 2, 5.0),
                                sharey=True)
    for i, ax in enumerate(axarray):
        (title, cache_dir) = cache_dirs[i]
        print("title: {0}".format(title))
        cache = pickle.load(open(cache_dir, 'rb'))
        meta = meta_of_dir(cache_dir)
        graph = graph_of_dir(cache_dir)
        results_size = defaultdict(list)
        for (p, r) in cache["size"].keys():
            if r == 0.0:
                continue  # can't divide by zero below. is this the right thing to do?
            for node, sizes in cache["size"][p, r].items():
                if node is bad_node_of_dir(cache_dir):
                    continue
                result = (meta[node]["pi"], np.average(sizes))
                results_size[p / r].append(result)

        # different values of p and r will return the same p/r. average these values?
        for ratio, data in results_size.copy().items():
            avg_by_prestige = defaultdict(list)
            for pi, size in data:
                avg_by_prestige[pi].append(size)

            results_size[ratio] = [(pi, np.average(sizes))
                                   for pi, sizes in avg_by_prestige.items()]
            results_size[ratio] = sorted(results_size[ratio],
                                         key=lambda x: x[0])

        filtered = ["1.0", "2.0", "3.0", "4.0", "5.0"]
        length_of_results = len(filtered)

        colors = iter(cm.rainbow(np.linspace(0, 1, length_of_results)))
        markers = Line2D.filled_markers
        count = -1
        for ratio, data in sorted(results_size.items(), key=lambda x: x[0]):
            if "%.1f" % ratio not in filtered:
                continue
            c = next(colors)
            count += 1
            m = markers[count]
            ax.scatter(*zip(*data),
                       color=c,
                       label='{0:.2f}'.format(ratio),
                       marker=m,
                       edgecolor='w',
                       clip_on=False,
                       zorder=1,
                       s=28)

            x = [
                pi for (pi, length) in data
                if not np.isnan(length) and not np.isinf(length)
            ]
            max_pi = max(x)
            if ratio > 0:
                # fit a logistic curve to this
                y = [
                    length for (pi, length) in data
                    if not np.isnan(length) and not np.isinf(length)
                ]

                popt, pcov = curve_fit(curve,
                                       np.array(x),
                                       np.array(y),
                                       bounds=(0., [1., 2., 200.]))
                print("infection probability: {0}\tcurve_fit: {1}".format(
                    ratio, popt))
                y = curve(x, *popt)

                ax.plot(x, y, color=c)
            #ax.plot(*zip(*data), color=next(colors), label='p/r = {0:.2f}'.format(ratio), marker = 'o')

        ax.set_xlim(0, max_pi)
        #ax.set_title(title, y=1.05, fontsize=16)
        ax.tick_params(labelsize=12)
        if i == 0:
            ax.set_xlabel(r'University Prestige, $\pi$',
                          fontsize=plot_utils.LABEL_SIZE)
            ax.set_ylabel(r'Epidemic Size, $S$',
                          fontsize=plot_utils.LABEL_SIZE)
        plot_utils.finalize(ax)

    plt.legend(loc='center left',
               bbox_to_anchor=(1, 0.5),
               fontsize=plot_utils.LEGEND_SIZE,
               title=r'$p/r$',
               scatterpoints=1,
               frameon=False)
    plt.ylim(ylim)
    plt.savefig(
        'results/test/size-results-of-ALL-{}.eps'.format(epidemic_type),
        bbox_inches='tight',
        format='eps',
        dpi=1000)
    plt.clf()
Пример #8
0
def plot_si_prestige_size(cache_dirs):
    fig, ax = plt.subplots(1, 1, figsize=(6.0, 4.0), sharey=True)
    #for i, ax in enumerate(axarray):
    (title, cache_dir) = cache_dirs
    print("title: {0}".format(title))
    cache = pickle.load(open(cache_dir, 'rb'))
    meta = meta_of_dir(cache_dir)
    graph = graph_of_dir(cache_dir)
    results_size = defaultdict(list)
    for p in cache["size"].keys():
        for node, sizes in cache["size"][p].items():
            if node is bad_node_of_dir(cache_dir):
                continue

            avg = np.average(sizes)
            if not np.isnan(avg) and not np.isinf(avg):
                result = (meta[node]["pi"], avg)
                results_size[p].append(result)

        results_size[p] = sorted(results_size[p], key=lambda x: x[0])

    filtered = sorted(cache["size"].keys())[1::2]
    length_of_results = len(filtered)

    colors = iter(cm.rainbow(np.linspace(0, 1, length_of_results)))
    markers = Line2D.filled_markers
    count = -1
    for p, data in sorted(results_size.items(), key=lambda x: x[0]):
        if p not in filtered:
            continue
        c = next(colors)
        count += 1
        m = markers[count]
        ax.scatter(*zip(*data),
                   color=c,
                   label='{0:.2f}'.format(p),
                   s=28,
                   marker=m,
                   edgecolor='w',
                   clip_on=False,
                   zorder=1)

        x = [
            pi for (pi, length) in data
            if not np.isnan(length) and not np.isinf(length)
        ]
        if p == 0.1:
            #print("Data: {0}\n".format([(i, row) for (i, row) in enumerate(data)]))
            prev = data[0][1]
            diffs = []
            for (i, row) in enumerate(data):
                if i in [
                        10, 20, 30, 40, 50, 60, 70, 80, 90, 100
                ]:  #[50, 100]:#[5, 10, 15, 20, 25, 30, 35, 40, 45, 50, 55, 60, 65, 70, 75, 80, 85, 90, 95, 100]:
                    #print(i, prev, row)
                    diffs.append(prev * 100.0 - row[1] * 100.0)
                    #print(float(prev-row[1])*100.0/float(row[1]), prev*100.0-row[1]*100.0)
                    prev = row[1]
            #print(diffs, np.mean(diffs))

        max_pi = max(x)
        if p > 0:
            # fit a logistic curve to this
            y = [
                length for (pi, length) in data
                if not np.isnan(length) and not np.isinf(length)
            ]

            popt, pcov = curve_fit(curve,
                                   np.array(x),
                                   np.array(y),
                                   bounds=(0., [1., 2., 200.]),
                                   maxfev=100)
            ##print("infection probability: {0}\tcurve_fit: {1}".format(p, popt))
            y = curve(x, *popt)

            ax.plot(x, y, color=c)

    ax.set_xlim(0, max_pi)
    #ax.set_title(title, y=1.05, fontsize=16)
    ax.tick_params(labelsize=12)
    ax.set_xlabel(r'University Prestige, $\pi$',
                  fontsize=plot_utils.LABEL_SIZE)
    ax.set_ylabel(r'Epidemic Size, $\frac{S}{N}$',
                  fontsize=plot_utils.LABEL_SIZE)
    plot_utils.finalize(ax)
    plt.ylim(0, 1)
    plt.legend(loc='center left',
               bbox_to_anchor=(1, 0.5),
               fontsize=plot_utils.LEGEND_SIZE,
               title='Transmission\nProbability, $p$',
               frameon=False,
               scatterpoints=1)
    #plt.tight_layout()
    plt.savefig('results/test/size-results-of-ALL-SI.eps',
                bbox_inches='tight',
                format='eps',
                dpi=1000)
Пример #9
0
def plot_si_prestige_size(cache_dirs):
    fig, ax = plt.subplots(1, 1, figsize=(6.0, 4.0), sharey=True)

    (title, cache_dir) = cache_dirs
    cache = pickle.load(open(cache_dir, 'rb'))
    meta = meta_of_dir(cache_dir)
    graph = graph_of_dir(cache_dir)
    results_size = defaultdict(list)
    for p in cache["size"].keys():
        for node, sizes in cache["size"][p].items():
            if node is bad_node_of_dir(cache_dir):
                continue

            avg = np.average(sizes)
            if not np.isnan(avg) and not np.isinf(avg):
                result = (meta[node]["pi"], avg)
                results_size[p].append(result)

        results_size[p] = sorted(results_size[p], key=lambda x: x[0])

    filtered = sorted(cache["size"].keys())[1::2]
    length_of_results = len(filtered)

    with open(cache_dir.replace(".p", "_size.tsv"), 'w') as file:
        writer = csv.writer(file, delimiter='\t')
        writer.writerow(["infection_prob", "prestige", "size"])
        for p, data in sorted(results_size.items(), key=lambda x: x[0]):
            if p not in filtered:
                continue
            for (pi, size) in data:
                if not np.isnan(size) and not np.isinf(size):
                    writer.writerow([p, pi, size])

    colors = iter(cm.rainbow(np.linspace(0, 1, length_of_results)))
    markers = Line2D.filled_markers; count = -1

    with open(cache_dir.replace(".p", "_size.tsv"), 'w') as file:
        writer = csv.writer(file, delimiter='\t')
        writer.writerow(["infection_prob", "prestige", "size"])
        for p, data in sorted(results_size.items(), key=lambda x: x[0]):
            if p not in filtered:
                continue
            c = next(colors); count += 1; m = markers[count]
            ax.scatter(*zip(*data), color=c, label='{0:.2f}'.format(p), s=28, marker=m, edgecolor='w', clip_on=False, zorder=1)
            for (pi, size) in data: writer.writerow([p, pi, size])

            x = [pi for (pi, length) in data if not np.isnan(length) and not np.isinf(length)]
            if p == 0.1:

                prev = data[0][1]
                diffs = []
                for (i, row) in enumerate(data):
                    if i in [10, 20, 30, 40, 50, 60, 70, 80, 90, 100]:
                        diffs.append(prev*100.0-row[1]*100.0)
                        prev = row[1]

            max_pi = max(x)
            if p > 0:
                # Fit a logistic curve to this
                y = [length for (pi, length) in data if not np.isnan(length) and not np.isinf(length)]

                popt, pcov = curve_fit(curve, np.array(x), np.array(y), bounds=(0., [1., 2., 200.]), maxfev=100)
                y = curve(x, *popt)

                ax.plot(x, y, color=c)

    ax.set_xlim(0, max_pi)
    ax.tick_params(labelsize=12)
    ax.set_xlabel(r'University Prestige, $\pi$', fontsize=plot_utils.LABEL_SIZE)
    ax.set_ylabel(r'Epidemic Size, $\frac{Y}{N}$', fontsize=plot_utils.LABEL_SIZE)
    plot_utils.finalize(ax)
    plt.ylim(0, 1)
    plt.legend(loc='center left', bbox_to_anchor=(1, 0.5), fontsize=plot_utils.LEGEND_SIZE, title='Transmission\nProbability, $p$', frameon=False, scatterpoints=1)
    plt.savefig('results/size-results-of-ALL-SI.eps', bbox_inches='tight', format='eps', dpi=1000)