def filter_edgelist(PARAMS, node_list): ifp = open(os.path.join(path_prefix, PARAMS['dataset']), "r") ofp = open(os.path.join(path_prefix, PARAMS['sampled_dataset']), "w") edges = ifp.readlines() ifp.close() count = 0 mapping = {} for node in node_list: mapping[node] = count count += 1 display("filter_edgelist", "Created mapping for nodes for " + str(count) + " nodes.") writelines = "" cedge = 0 cmatch = 0 for edge in edges: cedge += 1 tokens = edge.rstrip().split(" ") if (int(tokens[0]) in node_list) and (int(tokens[1]) in node_list): writelines += str(mapping[int(tokens[0])]) + " " + str(mapping[int( tokens[1])]) + "\n" cmatch += 1 ofp.write(writelines) ofp.close() display( "filter_edgelist", "The edge-list corresponding to filtered node-list with " + str(cedge) + " edges and " + str(cmatch) + " matching edges.") return
def sanitize_prob(Gcomplete): for edge in Gcomplete.edges(): if Gcomplete.node[edge[0]]['Pview'][edge[1]] > 1.0: Gcomplete.node[edge[0]]['Pview'][edge[1]] = 1.0 if Gcomplete.node[edge[0]]['Pshare'][edge[1]] > 1.0: Gcomplete.node[edge[0]]['Pshare'][edge[1]] = 1.0 if Gcomplete.node[edge[1]]['Pview'][edge[0]] > 1.0: Gcomplete.node[edge[1]]['Pview'][edge[0]] = 1.0 if Gcomplete.node[edge[1]]['Pshare'][edge[0]] > 1.0: Gcomplete.node[edge[1]]['Pshare'][edge[0]] = 1.0 if Gcomplete.node[edge[0]]['Pview'][edge[1]] < 0.0: Gcomplete.node[edge[0]]['Pview'][edge[1]] = 0.0 if Gcomplete.node[edge[0]]['Pshare'][edge[1]] < 0.0: Gcomplete.node[edge[0]]['Pshare'][edge[1]] = 0.0 if Gcomplete.node[edge[1]]['Pview'][edge[0]] < 0.0: Gcomplete.node[edge[1]]['Pview'][edge[0]] = 0.0 if Gcomplete.node[edge[1]]['Pshare'][edge[0]] < 0.0: Gcomplete.node[edge[1]]['Pshare'][edge[0]] = 0.0 display("sanitize_prob", "Sanitized Gcomplete.") return
def get_filtered_nodes(Goriginal, PARAMS): size = PARAMS['sample_size'] sample_using = PARAMS['sampling_technique'] node_list = [] degrees = Goriginal.degree(list(Goriginal.node)) node_deg_asc = sorted(degrees.items(), key=itg(1)) node_deg_desc = sorted(degrees.items(), key=itg(1), reverse=True) deg_asc = [i[1] for i in node_deg_asc] node_desc = [i[0] for i in node_deg_desc] if sample_using == "DegreeMin": median = int(math.ceil(np.median(deg_asc))) start_pos = 0 for i in node_deg_asc: if i[1] == median: start_pos = i[0] break node_asc = [i[0] for i in node_deg_asc] node_list = node_asc[start_pos:][:size] elif sample_using == "DegreeMax": node_list = node_desc[:size] display( "get_filtered_nodes", "Sampling done using " + sample_using + "; node list size is " + str(len(node_list)) + ".") return node_list
def load_parameters_for_test_run(): PARAMS, ALLVAR, STATVAR = load_all_properties() PARAMS = load_default_properties(PARAMS) display("load_parameters_for_test_run", "Parameters loaded for test run.") return PARAMS
def get_points_of_intro(Gcomplete, no_of_points): display("get_points_of_intro", "No. of points of introduction: " + str(no_of_points)) points_of_intro = [] for i in range(no_of_points): points_of_intro.append( get_next_point_of_intro(Gcomplete.number_of_nodes())) # display("get_points_of_intro", "Points of introduction: "+str(points_of_intro)) return points_of_intro
def introduce_all_content(Gcomplete, PARAMS): EF = [] content_count = PARAMS['content_count'] content_levels = PARAMS['content_levels'] for i in range(content_count): content_level = get_content_level(content_levels) EFlocal = IC.introduce_content(Gcomplete, content_level, PARAMS) EF.extend(EFlocal) display( "introduce_all_content", "Current content: " + str(i + 1) + " out of " + str(content_count)) return EF
def introduce_content(Gcomplete, content_level, parameters): EFlocal = [] global PARAMS PARAMS = parameters points_of_intro = get_points_of_intro( Gcomplete, get_no_of_points_of_intro(Gcomplete.number_of_nodes())) count = 0 for point in points_of_intro: ET = introduce_for_node(Gcomplete, point, content_level) EFlocal.append(ET) count += 1 display( "introduce_content", "Current POI count: " + str(count) + " out of " + str(len(points_of_intro))) return EFlocal
def boost_view_share_probabilities(Gcomplete, Gbase, view_boost, share_boost): count = 0 for edge in Gcomplete.edges(): if Gbase.has_edge(*edge): count += 1 Gcomplete.node[edge[0]]['Pview'][edge[1]] *= (1.0 + view_boost) Gcomplete.node[edge[0]]['Pshare'][edge[1]] *= (1.0 + share_boost) Gcomplete.node[edge[1]]['Pview'][edge[0]] *= (1.0 + view_boost) Gcomplete.node[edge[1]]['Pshare'][edge[0]] *= (1.0 + share_boost) display("boost_view_share_probabilities", "Provided view and share probability boosts to existing edges.") sanitize_prob(Gcomplete) display("boost_view_share_probabilities", "Sanitized probability values > 1 and < 0.") return count
def compare_graphs(PARAMS): Gbase, Gcomplete = GG.generate_graphs(PARAMS) Ginferred = IG.infer_graph(Gcomplete, PARAMS) display("compare_graphs", "Obtained inferred graph for this run.") comparison_stats = {} total_no_of_edges, true_positive, false_positive, missed_edges = get_comparison_stats( Gbase, Ginferred) comparison_stats['total_edges'] = total_no_of_edges comparison_stats['true_positive'] = true_positive comparison_stats['true_positive_ratio'] = (true_positive * 1.0) / (total_no_of_edges * 1.0) comparison_stats['false_positive'] = false_positive comparison_stats['false_positive_ratio'] = (false_positive * 1.0) / ( total_no_of_edges * 1.0) comparison_stats['missed_edges'] = missed_edges comparison_stats['missed_edges_ratio'] = (missed_edges * 1.0) / (total_no_of_edges * 1.0) display("compare_graphs", "Returning comparison statistics.") return comparison_stats
def infer_graph(Gcomplete, PARAMS): EFAll = [] run_count = PARAMS['run_count'] for i in range(run_count): EF = IAC.introduce_all_content(Gcomplete, PARAMS) EFAll.extend(EF) weighted_edges = combine_event_forests(EFAll) display("infer_graph", "Obtained weighted edges from forest.") weight_threshold = get_weight_threshold(weighted_edges, PARAMS) display("infer_graph", "Obtain weight threshold.") weighted_edges_filtered = impose_weight_restriction( weighted_edges, weight_threshold) display("infer_graph", "Imposed weight restriction.") Ginferred_weighted_directed = get_weighted_graph(weighted_edges_filtered) display("infer_graph", "Obtained weighted directed graph.") Ginferred_directed = get_directed_graph(weighted_edges_filtered) display("infer_graph", "Obtained directed graph.") Ginferred = Ginferred_directed.to_undirected() display("infer_graph", "Obtained final inferred graph.") return Ginferred
def draw_graph(graph, title): graph_gvz = nx.to_agraph(graph) graph_gvz.layout(prog="neato") graph_gvz.draw(os.path.join(path_prefix, title + ".ps")) display("draw_graph", "Drawn " + title + " and saved to " + title + ".ps")
def generate_graphs(PARAMS): filter_count = PARAMS['sample_size'] mean = PARAMS['vwshprob_mean'] sd = PARAMS['vwshprob_stdv'] view_boost = PARAMS['view_boost'] share_boost = PARAMS['share_boost'] Goriginal = nx.read_edgelist(os.path.join(path_prefix, PARAMS['dataset']), nodetype=int) display("generate_graphs", "Prepared Goriginal from entire dataset.") filtered_nodelist = get_filtered_nodes(Goriginal, PARAMS) display("generate_graphs", "Received filtered node list from get_filtered_nodes().") filter_edgelist(PARAMS, filtered_nodelist) display("generate_graphs", "Filtered edge list recorded in sampled dataset.") Gbase = nx.read_edgelist(os.path.join(path_prefix, PARAMS['sampled_dataset']), nodetype=int) display("generate_graphs", "Prepared Gbase from sampled dataset.") Gcomplete = nx.complete_graph(nx.number_of_nodes(Gbase)) display("generate_graphs", "Prepared Gcomplete from sampled dataset.") assign_probabilities(Gcomplete, mean, sd) display("generate_graphs", "Assigned probabilities to all nodes.") edges_matched = boost_view_share_probabilities(Gcomplete, Gbase, view_boost, share_boost) display( "generate_graphs", "Boosted view and share probabilities, number of matched edges is " + str(edges_matched)) return Gbase, Gcomplete