def incremental_simulation(g, c, p, return_new_edges=False):
    visited = {v: False for v in np.arange(g.num_vertices())}
    new_c = copy(c)
    for v in infected_nodes(c):
        visited[v] = True

    if return_new_edges:
        new_edges = []

    queue = list(infected_nodes(c))
    while len(queue) > 0:
        u = queue.pop(0)
        uu = g.vertex(u)
        for e in uu.out_edges():
            v = int(e.target())
            if np.random.random() <= p[e] and not visited[v]:  # active
                if return_new_edges:
                    new_edges.append((u, v))
                new_c[v] = c[u] + 1
                visited[v] = True
                queue.append(v)

    if return_new_edges:
        return (new_c, new_edges)
    else:
        return new_c
    def plot(self,
             c,
             X,
             probas,
             interception_func=None,
             setting_kwargs={},
             uninfected_small=False,
             lighten_obs=True,
             lighten_prediction=False,
             highlight_missing_infection=False,
             color_mapper=None,
             **kwargs):
        setting = heatmap_plot_setting(self.g,
                                       c,
                                       X,
                                       probas,
                                       color_mapper=color_mapper,
                                       **setting_kwargs)
        if uninfected_small:
            uninfected = set(np.arange(len(c))) - set(infected_nodes(c))
            # make terminals larger
            setting['node_size_info'][tuple(
                X)] = setting['node_size_info'][tuple(X)] * 1.5

            # make uninfected smaller
            setting['node_size_info'][tuple(
                uninfected)] = setting['node_size_info']['default'] / 1.5

        if lighten_obs:
            setting['node_color_info'][X] = 0

        if lighten_prediction:

            depth = setting['node_color_info']
            source = cascade_source(c)
            depth[depth == 1] = 0.5
            depth[source] = 1

        if highlight_missing_infection:
            missing = set(infected_nodes(c)) - set(X) - set(
                (probas == 1).nonzero()[0])

        if interception_func is not None:
            interception_func(setting)
        visualize(self.g, self.pos, **setting, **kwargs)
def ic(g, p, source=None, stop_fraction=1.0, return_tree_edges=False):
    """
    graph_tool version of simulating cascade
    return np.ndarray on vertices as the infection time in cascade
    uninfected node has dist -1

    stop_fraction: detemines how large the snapshot is.
    """
    if source is None:
        source = random.choice(np.arange(g.num_vertices(), dtype=int))
    gv = sample_graph_by_p(g, p)

    times = get_infection_time(gv, source, return_edges=False)
    size = len(infected_nodes(times))

    min_size = int(stop_fraction * g.num_vertices())

    if size < min_size:
        # size does not fit, early stopping to save time
        raise CascadeTooSmall('{} < {}'.format(size, min_size))

    stuff = get_infection_time(gv, source, return_edges=return_tree_edges)

    if not return_tree_edges:
        times = stuff
        tree_edges = None
    else:
        times, tree_edges = stuff

    # truncate the infection to fit size
    times[times == -1] = (times.max() + 1)
    uninfected = times.argsort()[min_size:]
    times[uninfected] = -1

    if tree_edges is not None:
        inf_nodes = set(infected_nodes(times))
        tree_edges = [
            e for e in tree_edges if e[0] in inf_nodes and e[1] in inf_nodes
        ]

    return source, times, tree_edges
def incremental_simulation(g, c, p, num_nodes, return_new_edges=False):
    """incrementally add edges to given cascade
    num_nodes is passed bacause vfilt might be passed
    """
    # print('incremental_simulation -> g', g)
    gv = sample_graph_by_p(g, p)

    new_infected_nodes = set(infected_nodes(c))
    comp = label_components(gv)[0]
    covered_cids = set()
    for v in infected_nodes(c):
        cid = comp[v]
        if cid not in covered_cids:
            new_infected_nodes |= set((comp.a == cid).nonzero()[0])
            covered_cids.add(cid)

    new_c = np.ones(g.num_vertices()) * (-1)
    new_c[list(new_infected_nodes)] = 1

    if return_new_edges:
        raise Exception("`return_new_edges` not supported anymore")
    else:
        return new_c
Пример #5
0
    def add_incremental_edges(self, tree_nodes):
        if isinstance(tree_nodes, GraphView):
            raise TypeError('add_incremental_edges does not support GraphView yet. ' +
                            'Please pass in a set of nodes')
        fake_c = np.ones(self.num_nodes) * (-1)
        fake_c[list(tree_nodes)] = 1

        edge_weights = get_edge_weights(self.g)
        assert edge_weights is not None, 'for incremental edge addition, edge weight should be given'

        new_c = incremental_simulation(self.g, fake_c, edge_weights,
                                       self.num_nodes,
                                       return_new_edges=False)

        return set(infected_nodes(new_c))
def get_infection_time(g, source, return_edges=False):
    """for IC model
    """
    time, pred_map = shortest_distance(g, source=source, pred_map=True)
    time = np.array(time.a)
    time[time == MAXINT] = -1
    if return_edges:
        edges = []
        reached = infected_nodes(time)
        for v in reached:
            # print(v)
            if pred_map[v] >= 0 and pred_map[v] != v:
                edges.append((pred_map[v], v))
        return time, edges
    else:
        return time
def run_with_or_without_resampling(g, cid, c, X, n_samples, sampling_method):
    gi = from_gt(g, get_edge_weights(g))
    infected = infected_nodes(c)
    y_true = np.zeros((len(c), ))
    y_true[infected] = 1
    X_set = set(X)
    mask = np.array([(i not in X_set) for i in range(len(c))])

    root_sampler = build_true_root_sampler(c)

    options = {
        'P': {
            'with_resampling': True,
            'true_casacde_proba_func': cascade_probability_gt
        },
        'P_new': {
            'with_resampling': True,
            'true_casacde_proba_func': ic_cascade_probability_gt
        },
        'no resampling': {
            'with_resampling': False
        }
    }

    ap_ans, p_ans = {}, {}
    for name, opt in options.items():
        sampler = TreeSamplePool(g,
                                 n_samples,
                                 sampling_method,
                                 gi=gi,
                                 return_type='nodes',
                                 **opt)
        sampler.fill(X, root_sampler=root_sampler)

        estimator = TreeBasedStatistics(g, sampler.samples)

        probas = infection_probability(g, X, sampler, estimator)

        ap_score = average_precision_score(y_true[mask], probas[mask])
        p_score = precision_at_cascade_size(y_true[mask], probas[mask])
        # print('with_resampling={}, AP score={}'.format(opt, score))
        ap_ans[name] = ap_score
        p_ans[name] = p_score
    ap_ans['cid'] = cid
    p_ans['cid'] = cid
    # print(ans)
    return ap_ans, p_ans
def heatmap_plot_setting(g, c, X, weight, **kwargs):
    inf_nodes = infected_nodes(c)
    hidden_infs = set(inf_nodes) - set(X)

    multipler = kwargs.get('size_multiplier', 1.0)
    s = default_plot_setting(g, c, X, **kwargs)
    if False:
        s['node_size_info'][tuple(X)] = 15
        s['node_size_info'][tuple(hidden_infs)] = 15
        s['node_size_info']['default'] = 7.5
    else:
        s['node_size_info'][tuple(X)] = 10 * multipler
        s['node_size_info'][tuple(hidden_infs)] = 10 * multipler
        s['node_size_info']['default'] = 10 * multipler

    s['node_color_info'] = weight
    return s
def default_plot_setting(g,
                         c,
                         X,
                         size_multiplier=1.0,
                         edge_width_multiplier=1.0,
                         deemphasize_hidden_infs=False):
    source = cascade_source(c)
    inf_nodes = infected_nodes(c)
    hidden_infs = set(inf_nodes) - set(X)

    node_color_info = OrderedDict()
    node_color_info[tuple(X)] = COLOR_BLUE
    if not deemphasize_hidden_infs:
        # print(COLOR_DARK_RED)
        node_color_info[tuple(hidden_infs)] = COLOR_YELLOW
    node_color_info[(source, )] = COLOR_GREEN
    node_color_info['default'] = COLOR_WHITE

    node_shape_info = OrderedDict()
    node_shape_info[tuple(X)] = SHAPE_SQUARE
    node_shape_info['default'] = SHAPE_CIRCLE
    node_shape_info[(source, )] = SHAPE_PENTAGON

    node_size_info = OrderedDict()

    node_size_info[tuple(X)] = 15 * size_multiplier
    node_size_info[(source, )] = 20 * size_multiplier
    if not deemphasize_hidden_infs:
        node_size_info[tuple(hidden_infs)] = 12.5 * size_multiplier
    node_size_info['default'] = 6 * size_multiplier

    node_text_info = {'default': ''}

    edge_color_info = {'default': 'white'}
    edge_pen_width_info = {'default': 2.0 * edge_width_multiplier}
    return {
        'node_color_info': node_color_info,
        'node_shape_info': node_shape_info,
        'node_size_info': node_size_info,
        'edge_color_info': edge_color_info,
        'edge_pen_width_info': edge_pen_width_info,
        'node_text_info': node_text_info
    }
Пример #10
0
def accumulate_score(stuff, eval_func):
    scores_by_root_sampling_method = {}
    for root_sampling_method, data in stuff.items():
        scores_by_root_sampling_method[root_sampling_method] = []
        for row in tqdm(data):
            c, obs = row['c'], row['obs']
            inf_nodes = infected_nodes(c)
            y_true = np.zeros((len(c), ))
            y_true[inf_nodes] = 1
            mask = np.array([(i not in obs) for i in range(len(c))])

            score = {}
            # names = ['random', 'st_naive', 'st_inc']
            names = ['random', 'st_naive']
            random_inf_p = np.random.random(g.num_vertices())
            for name, inf_probas in zip(
                    names, [random_inf_p, row['st_naive_probas']]):
                # row['st_tree_inc_probas']]):
                score[name] = eval_func(y_true[mask], inf_probas[mask])
            scores_by_root_sampling_method[root_sampling_method].append(score)
    return scores_by_root_sampling_method
def heatmap_plot_setting(g, c, X, weight, color_mapper=None, **kwargs):
    inf_nodes = infected_nodes(c)
    hidden_infs = set(inf_nodes) - set(X)

    multipler = kwargs.get('size_multiplier', 1.0)
    s = default_plot_setting(g, c, X, **kwargs)
    if False:
        s['node_size_info'][tuple(X)] = 15
        s['node_size_info'][tuple(hidden_infs)] = 15
        s['node_size_info']['default'] = 7.5
    else:
        s['node_size_info'][tuple(X)] = 10 * multipler
        s['node_size_info'][tuple(hidden_infs)] = 10 * multipler
        s['node_size_info']['default'] = 10 * multipler

    if color_mapper is None:
        s['node_color_info'] = weight
    else:
        s['node_color_info'] = {}
        for n, p in enumerate(weight):
            s['node_color_info'][(n, )] = color_mapper(p)

    return s
Пример #12
0
def test_gen_input(g, cascade_model, weighted, source):
    if weighted:
        p = g.edge_properties['weights']
    else:
        p = g.new_edge_property('float')
        p.set_value(0.8)
    # print(cascade_model, weighted, source)
    rows = [gen_input(g, p=p, model=cascade_model, source=source, stop_fraction=0.1)
            for i in range(10)]

    # make sure no two cascades are the same
    # with low probability, this fails
    for r1, r2 in combinations(rows, 2):
        obs1, c1 = r1[:2]
        obs2, c2 = r2[:2]
        assert set(obs1) != set(obs2)

    # check for cascade size
    # only applicable for SI model
    if cascade_model == 'si':
        for r in rows:
            c = r[1]
            frac = len(infected_nodes(c)) / g.num_vertices()
            assert frac <= 0.11
def one_run(g,
            norm_g,
            q,
            eps,
            root_sampler_name,
            min_size,
            max_size,
            observation_method="uniform",
            with_inc=False):
    print("observation_method", observation_method)

    n_samples = 100

    p = g.edge_properties['weights']

    obs, c = gen_input(g,
                       source=None,
                       p=p,
                       q=q,
                       model='ic',
                       observation_method=observation_method,
                       min_size=min_size,
                       max_size=max_size)

    print('cascade size', len(infected_nodes(c)))
    # inf_nodes = infected_nodes(c)
    source = np.nonzero(c == 0)[0][0]

    if root_sampler_name == 'pagerank':
        root_sampler = build_root_sampler_by_pagerank_score(g, obs, c, eps=eps)
    elif root_sampler_name == 'true':
        root_sampler = (lambda: source)
    else:
        root_sampler = (lambda: None)

    # method 2:
    # vanilla steiner tree sampling
    gi = from_gt(norm_g, weights=get_edge_weights(norm_g))
    st_tree_nodes = sample_steiner_trees(g,
                                         obs,
                                         root=root_sampler(),
                                         method='cut',
                                         n_samples=n_samples,
                                         gi=gi,
                                         return_tree_nodes=True)
    node_stat = TreeBasedStatistics(g, st_tree_nodes)
    st_naive_probas = node_stat.unconditional_proba()

    if with_inc:
        # method 3
        # with incremental cascade simulation
        st_tree_nodes = sample_steiner_trees(g,
                                             obs,
                                             root=root_sampler(),
                                             method='cut',
                                             n_samples=n_samples,
                                             gi=gi,
                                             return_tree_nodes=True)
        new_tree_nodes = []
        for nodes in st_tree_nodes:
            fake_c = np.ones(g.num_vertices()) * (-1)
            fake_c[list(nodes)] = 1
            new_c = incremental_simulation(g,
                                           fake_c,
                                           p,
                                           return_new_edges=False)
            new_tree_nodes.append(infected_nodes(new_c))
        node_stat = TreeBasedStatistics(g, new_tree_nodes)
        st_tree_inc_probas = node_stat.unconditional_proba()

        # y_true = np.zeros((len(c), ))
        # y_true[inf_nodes] = 1

        # mask = np.array([(i not in obs) for i in range(len(c))])

    row = {'c': c, 'obs': obs, 'st_naive_probas': st_naive_probas}

    if with_inc:
        row['st_tree_inc_probas'] = st_tree_inc_probas
    # # for inf_probas in [brute_force_inf_probas, st_naive_probas, st_tree_inc_probas]:
    # for inf_probas in [st_naive_probas, st_tree_inc_probas]:
    #     row.append(average_precision_score(y_true[mask], inf_probas[mask]))
    return row
                                                 cascade_fraction, obs_frac)

print(dirname)
g = load_graph_by_name(graph, weighted=True, suffix=suffix)

gprop = g.graph_properties
if 'p_min' in gprop:
    p_min, p_max = gprop['p_min'], gprop['p_max']
    print('p_min={}, p_max={}'.format(p_min, p_max))
else:
    print('external weight initialization')

os = [pkl.load(open(p, 'rb'))[0] for p in glob(dirname)]
cs = [pkl.load(open(p, 'rb'))[1] for p in glob(dirname)]
obs_sizes = [len(o) for o in os]
c_sizes = [len(infected_nodes(c)) for c in cs]
roots = list(map(cascade_source, cs))
print('roots freq:')
print(Counter(roots).most_common(10))

obs_cnt = Counter([tuple(sorted(o)) for o in os])
print('top cascade freq:')
for _, c in obs_cnt.most_common(10):
    print('freq:', c)

print('cascade size describe:')
print(pd.Series(c_sizes).describe())
print('-' * 10)
print('fraction', np.mean(c_sizes) / g.num_vertices())

print('-' * 10)