def main(): parser = argparse.ArgumentParser(description='') parser.add_argument('-g', '--graph', help='graph name') parser.add_argument('-s', '--graph_suffix', default='', help='') parser.add_argument('-w', '--weighted', action='store_true', help='') parser.add_argument('-r', '--only_reserve', action='store_true', help='') parser.add_argument('-n', '--only_normalize', action='store_true', help='') parser.add_argument('-o', '--output_path', help='') args = parser.parse_args() g = load_graph_by_name(args.graph, weighted=args.weighted, suffix=args.graph_suffix) if args.only_reserve: print('only_reserve') new_g = reverse_edge_weights(g) elif args.only_normalize: print('only normlize') new_g = normalize_globally(g) else: new_g = preprocess(g) new_g.save(args.output_path) print('saved to {}'.format(args.output_path))
def main(): parser = argparse.ArgumentParser(description='') parser.add_argument('-g', '--graph', help='graph name') parser.add_argument('-d', '--to_directed', action='store_true', help='if make directed or not') parser.add_argument('--p_min', default=0.0, type=float, help='lower bound for edge weight') parser.add_argument('--p_max', default=1.0, type=float, help='upper bound for edge weight') parser.add_argument('-o', '--output') args = parser.parse_args() g = load_graph_by_name(args.graph) remove_self_loops(g) if args.to_directed: g.set_directed(True) edges_iter = list(g.edges()) for e in edges_iter: g.add_edge(e.target(), e.source()) weights = g.new_edge_property('float') weights.a = np.random.random( g.num_edges()) * (args.p_max - args.p_min) + args.p_min g.edge_properties["weights"] = weights g.graph_properties['p_min'] = g.new_graph_property("float", args.p_min) g.graph_properties['p_max'] = g.new_graph_property("float", args.p_max) print(g.graph_properties['p_min'], args.p_min) print(g.graph_properties['p_max'], args.p_max) print('g.num_edges()', g.num_edges()) output_path = args.output # 'data/{}/graph_weighted.gt'.format(args.graph) g.save(output_path) print('dumped to {}'.format(output_path))
from matplotlib import pyplot as plt from viz_helpers import lattice_node_pos from minimum_steiner_tree import min_steiner_tree from cascade_generator import si, observe_cascade from eval_helpers import infection_precision_recall from graph_helpers import remove_filters, load_graph_by_name from inference import infer_infected_nodes from query_selection import RandomQueryGenerator, OurQueryGenerator, PRQueryGenerator from experiment import gen_input, one_round_experiment # In[33]: graph_name = 'karate' g = load_graph_by_name(graph_name) # In[9]: if False: import graph_tool as gt pos = gt.draw.sfdp_layout(gv) vertex_text = g.new_vertex_property('string') for v in g.vertices(): vertex_text[v] = str(v) gt.draw.graph_draw(gv, pos=pos, vertex_text=vertex_text) # In[26]: def one_combined_round(g, n_queries, obs, c):
# coding: utf-8 import sys import numpy as np from graph_helpers import load_graph_by_name from preprocess_graph import reverse_edge_weights graph_name = sys.argv[1] g = load_graph_by_name(graph_name, weighted=True) w = g.new_edge_property('float') in_deg = g.degree_property_map('in', weight=None) for u in g.vertices(): for v in g.vertex(u).in_neighbours(): # v -> u w[g.edge(v, u)] = 1 / in_deg[u] in_deg_weighted = g.degree_property_map('in', weight=w) assert np.all(np.isclose(in_deg_weighted.a, 1)), 'maybe self-loops are not removed' g.edge_properties['weights'] = w g.save('data/{}/graph_weighted_sto.gt'.format(graph_name)) rev_g = reverse_edge_weights(g) out_deg_weighted = g.degree_property_map( 'out', weight=rev_g.edge_properties['weights']) assert np.all(np.isclose(out_deg_weighted.a, 1)) rev_g.save('data/{}/graph_weighted_sto_rev.gt'.format(graph_name))
print('-' * 10) for k, v in args._get_kwargs(): print("{}={}".format(k, v)) inf_result_dirname = 'outputs/{}/{}/{}'.format(args.inf_dirname, args.data_id, args.sampling_method) query_dirname = 'outputs/{}/{}/{}'.format(args.query_dirname, args.data_id, args.sampling_method) print('summarizing ', inf_result_dirname) # if n_queries is too large, e.g, 100, # we might have no hidden infected nodes left and average precision score is undefined n_queries = args.n_queries g = load_graph_by_name(args.graph_name) query_dir_ids = list( map(lambda s: s.strip(), args.query_dir_ids.split(','))) if args.legend_labels is not None: labels = list(map(lambda s: s.strip(), args.legend_labels.split(','))) else: labels = query_dir_ids print('query_dir_ids:', query_dir_ids) if args.eval_with_mask: pkl_dir = 'eval_result/{}'.format(args.eval_method) else: pkl_dir = 'eval_result/{}-no-mask'.format(args.eval_method) print('pkl dir', pkl_dir)
args = parser.parse_args() print("Args:") print('-' * 10) for k, v in args._get_kwargs(): print("{}={}".format(k, v)) graph_name = args.graph suffix = args.graph_suffix n_runs = args.n_runs q = args.obs_fraction observation_method = args.observation_method min_size = args.min_size max_size = args.max_size g = load_graph_by_name(graph_name, weighted=True) norm_g = load_graph_by_name(graph_name, weighted=True, suffix=suffix) print('g.num_edges()', g.num_edges()) print('norm_g.num_edges()', norm_g.num_edges()) result = {} # if False: for eps in [0.0, 0.5]: rows = Parallel(n_jobs=-1)( delayed(one_run)(g, norm_g, q, eps, 'pagerank', min_size,
METHODS_WANT_TREE = {'leaves', 'bfs-head', 'bfs-tail'} args = parser.parse_args() print("Args:") print('-' * 10) for k, v in args._get_kwargs(): print("{}={}".format(k, v)) graph_name = args.graph if not args.use_edge_weights: print('uniform edge weight') g = load_graph_by_name(graph_name, weighted=False, suffix=args.graph_suffix) p = args.infection_proba else: print('non-uniform edge weight') g = load_graph_by_name(graph_name, weighted=True, suffix=args.graph_suffix) p = g.edge_properties['weights'] print('p=', p) print('p.a=', p.a) # root_sampler = build_out_degree_root_sampler(g) root_sampler = lambda: None # root_sampler = lambda: 45 d = args.output_dir
from matplotlib import pyplot as plt from graph_helpers import load_graph_by_name graph_name = 'grqc' suffix = 's0.03' aspects = ['roc', 'ap', 'precision', 'recall', 'f1'] qs = ['0.1', '0.25', '0.5', '0.75'] eval_metric = 'mean' for aspect in aspects: root_sampling_method = 'random_root' g = load_graph_by_name(graph_name, weighted=True, suffix='_' + suffix) methods = [ 'pagerank-eps0.0', 'pagerank-eps0.5', 'pagerank-eps1.0', 'random_root', 'true root' ] columns_to_plot = [] for q in qs: result_path = 'eval_result/{}-{}-q{}-by_root_sampling_methods.pkl'.format( graph_name, suffix, q) row = pkl.load(open(result_path, 'rb')) print('q={}'.format(q)) print('-' * 10) print(row[aspect][root_sampling_method]) columns_to_plot.append(
from collections import Counter graph = 'grqc' model = 'ic' # suffix = '_tmp' # cascade_fraction = 0 suffix = '' cascade_fraction = 0.25 obs_frac = "0.5" cascade_dir = 'cascade' dirname = '{}/{}-m{}-s{}-o{}-omuniform/*'.format(cascade_dir, graph, model, cascade_fraction, obs_frac) print(dirname) g = load_graph_by_name(graph, weighted=True, suffix=suffix) gprop = g.graph_properties if 'p_min' in gprop: p_min, p_max = gprop['p_min'], gprop['p_max'] print('p_min={}, p_max={}'.format(p_min, p_max)) else: print('external weight initialization') os = [pkl.load(open(p, 'rb'))[0] for p in glob(dirname)] cs = [pkl.load(open(p, 'rb'))[1] for p in glob(dirname)] obs_sizes = [len(o) for o in os] c_sizes = [len(infected_nodes(c)) for c in cs] roots = list(map(cascade_source, cs)) print('roots freq:') print(Counter(roots).most_common(10))
def g(): return load_graph_by_name('grqc', weighted=True)
help='number of samples') parser.add_argument('-j', '--n_jobs', default=-1, type=int, help='number of parallel jobs') args = parser.parse_args() openmp_set_num_threads(1) graph_name = args.graph sampling_method = args.sampling_method n_samples = args.n_samples g_rev = load_graph_by_name(graph_name, weighted=True, suffix='_reversed') cs = load_cascades( 'cascade-weighted/{}-mic-s0.02-oleaves/'.format(graph_name)) tuples_of_records = Parallel(n_jobs=args.n_jobs)( delayed(run_with_or_without_resampling)(g_rev, cid, c, X, n_samples, sampling_method) for cid, (X, c) in tqdm(cs, total=96)) ap_records, p_records = zip(*tuples_of_records) ap_df = pd.DataFrame.from_records(ap_records) print('ap score:') print(ap_df.describe()) pk_df = pd.DataFrame.from_records(p_records)