def filter_edgelist(PARAMS, node_list):

    ifp = open(os.path.join(path_prefix, PARAMS['dataset']), "r")
    ofp = open(os.path.join(path_prefix, PARAMS['sampled_dataset']), "w")
    edges = ifp.readlines()
    ifp.close()

    count = 0
    mapping = {}
    for node in node_list:
        mapping[node] = count
        count += 1
    display("filter_edgelist",
            "Created mapping for nodes for " + str(count) + " nodes.")

    writelines = ""

    cedge = 0
    cmatch = 0
    for edge in edges:
        cedge += 1
        tokens = edge.rstrip().split(" ")
        if (int(tokens[0]) in node_list) and (int(tokens[1]) in node_list):
            writelines += str(mapping[int(tokens[0])]) + " " + str(mapping[int(
                tokens[1])]) + "\n"
            cmatch += 1

    ofp.write(writelines)
    ofp.close()
    display(
        "filter_edgelist",
        "The edge-list corresponding to filtered node-list with " +
        str(cedge) + " edges and " + str(cmatch) + " matching edges.")

    return
def sanitize_prob(Gcomplete):

    for edge in Gcomplete.edges():

        if Gcomplete.node[edge[0]]['Pview'][edge[1]] > 1.0:
            Gcomplete.node[edge[0]]['Pview'][edge[1]] = 1.0

        if Gcomplete.node[edge[0]]['Pshare'][edge[1]] > 1.0:
            Gcomplete.node[edge[0]]['Pshare'][edge[1]] = 1.0

        if Gcomplete.node[edge[1]]['Pview'][edge[0]] > 1.0:
            Gcomplete.node[edge[1]]['Pview'][edge[0]] = 1.0

        if Gcomplete.node[edge[1]]['Pshare'][edge[0]] > 1.0:
            Gcomplete.node[edge[1]]['Pshare'][edge[0]] = 1.0

        if Gcomplete.node[edge[0]]['Pview'][edge[1]] < 0.0:
            Gcomplete.node[edge[0]]['Pview'][edge[1]] = 0.0

        if Gcomplete.node[edge[0]]['Pshare'][edge[1]] < 0.0:
            Gcomplete.node[edge[0]]['Pshare'][edge[1]] = 0.0

        if Gcomplete.node[edge[1]]['Pview'][edge[0]] < 0.0:
            Gcomplete.node[edge[1]]['Pview'][edge[0]] = 0.0

        if Gcomplete.node[edge[1]]['Pshare'][edge[0]] < 0.0:
            Gcomplete.node[edge[1]]['Pshare'][edge[0]] = 0.0

    display("sanitize_prob", "Sanitized Gcomplete.")

    return
def get_filtered_nodes(Goriginal, PARAMS):

    size = PARAMS['sample_size']
    sample_using = PARAMS['sampling_technique']
    node_list = []

    degrees = Goriginal.degree(list(Goriginal.node))
    node_deg_asc = sorted(degrees.items(), key=itg(1))
    node_deg_desc = sorted(degrees.items(), key=itg(1), reverse=True)
    deg_asc = [i[1] for i in node_deg_asc]
    node_desc = [i[0] for i in node_deg_desc]

    if sample_using == "DegreeMin":
        median = int(math.ceil(np.median(deg_asc)))
        start_pos = 0
        for i in node_deg_asc:
            if i[1] == median:
                start_pos = i[0]
                break
        node_asc = [i[0] for i in node_deg_asc]
        node_list = node_asc[start_pos:][:size]

    elif sample_using == "DegreeMax":
        node_list = node_desc[:size]

    display(
        "get_filtered_nodes", "Sampling done using " + sample_using +
        "; node list size is " + str(len(node_list)) + ".")

    return node_list
예제 #4
0
def load_parameters_for_test_run():

    PARAMS, ALLVAR, STATVAR = load_all_properties()
    PARAMS = load_default_properties(PARAMS)
    display("load_parameters_for_test_run", "Parameters loaded for test run.")

    return PARAMS
def get_points_of_intro(Gcomplete, no_of_points):

    display("get_points_of_intro",
            "No. of points of introduction: " + str(no_of_points))
    points_of_intro = []

    for i in range(no_of_points):
        points_of_intro.append(
            get_next_point_of_intro(Gcomplete.number_of_nodes()))
    # display("get_points_of_intro", "Points of introduction: "+str(points_of_intro))

    return points_of_intro
예제 #6
0
def introduce_all_content(Gcomplete, PARAMS):

    EF = []
    content_count = PARAMS['content_count']
    content_levels = PARAMS['content_levels']

    for i in range(content_count):
        content_level = get_content_level(content_levels)
        EFlocal = IC.introduce_content(Gcomplete, content_level, PARAMS)
        EF.extend(EFlocal)
        display(
            "introduce_all_content",
            "Current content: " + str(i + 1) + " out of " + str(content_count))

    return EF
def introduce_content(Gcomplete, content_level, parameters):

    EFlocal = []
    global PARAMS
    PARAMS = parameters
    points_of_intro = get_points_of_intro(
        Gcomplete, get_no_of_points_of_intro(Gcomplete.number_of_nodes()))

    count = 0
    for point in points_of_intro:
        ET = introduce_for_node(Gcomplete, point, content_level)
        EFlocal.append(ET)
        count += 1
        display(
            "introduce_content", "Current POI count: " + str(count) +
            " out of " + str(len(points_of_intro)))

    return EFlocal
def boost_view_share_probabilities(Gcomplete, Gbase, view_boost, share_boost):

    count = 0

    for edge in Gcomplete.edges():

        if Gbase.has_edge(*edge):

            count += 1
            Gcomplete.node[edge[0]]['Pview'][edge[1]] *= (1.0 + view_boost)
            Gcomplete.node[edge[0]]['Pshare'][edge[1]] *= (1.0 + share_boost)
            Gcomplete.node[edge[1]]['Pview'][edge[0]] *= (1.0 + view_boost)
            Gcomplete.node[edge[1]]['Pshare'][edge[0]] *= (1.0 + share_boost)

    display("boost_view_share_probabilities",
            "Provided view and share probability boosts to existing edges.")
    sanitize_prob(Gcomplete)
    display("boost_view_share_probabilities",
            "Sanitized probability values > 1 and < 0.")

    return count
예제 #9
0
def compare_graphs(PARAMS):

    Gbase, Gcomplete = GG.generate_graphs(PARAMS)
    Ginferred = IG.infer_graph(Gcomplete, PARAMS)
    display("compare_graphs", "Obtained inferred graph for this run.")

    comparison_stats = {}
    total_no_of_edges, true_positive, false_positive, missed_edges = get_comparison_stats(
        Gbase, Ginferred)
    comparison_stats['total_edges'] = total_no_of_edges
    comparison_stats['true_positive'] = true_positive
    comparison_stats['true_positive_ratio'] = (true_positive *
                                               1.0) / (total_no_of_edges * 1.0)
    comparison_stats['false_positive'] = false_positive
    comparison_stats['false_positive_ratio'] = (false_positive * 1.0) / (
        total_no_of_edges * 1.0)
    comparison_stats['missed_edges'] = missed_edges
    comparison_stats['missed_edges_ratio'] = (missed_edges *
                                              1.0) / (total_no_of_edges * 1.0)
    display("compare_graphs", "Returning comparison statistics.")

    return comparison_stats
def infer_graph(Gcomplete, PARAMS):

    EFAll = []
    run_count = PARAMS['run_count']

    for i in range(run_count):
        EF = IAC.introduce_all_content(Gcomplete, PARAMS)
        EFAll.extend(EF)

    weighted_edges = combine_event_forests(EFAll)
    display("infer_graph", "Obtained weighted edges from forest.")
    weight_threshold = get_weight_threshold(weighted_edges, PARAMS)
    display("infer_graph", "Obtain weight threshold.")
    weighted_edges_filtered = impose_weight_restriction(
        weighted_edges, weight_threshold)
    display("infer_graph", "Imposed weight restriction.")

    Ginferred_weighted_directed = get_weighted_graph(weighted_edges_filtered)
    display("infer_graph", "Obtained weighted directed graph.")
    Ginferred_directed = get_directed_graph(weighted_edges_filtered)
    display("infer_graph", "Obtained directed graph.")
    Ginferred = Ginferred_directed.to_undirected()
    display("infer_graph", "Obtained final inferred graph.")

    return Ginferred
def draw_graph(graph, title):
    graph_gvz = nx.to_agraph(graph)
    graph_gvz.layout(prog="neato")
    graph_gvz.draw(os.path.join(path_prefix, title + ".ps"))
    display("draw_graph", "Drawn " + title + " and saved to " + title + ".ps")
def generate_graphs(PARAMS):

    filter_count = PARAMS['sample_size']
    mean = PARAMS['vwshprob_mean']
    sd = PARAMS['vwshprob_stdv']
    view_boost = PARAMS['view_boost']
    share_boost = PARAMS['share_boost']

    Goriginal = nx.read_edgelist(os.path.join(path_prefix, PARAMS['dataset']),
                                 nodetype=int)
    display("generate_graphs", "Prepared Goriginal from entire dataset.")
    filtered_nodelist = get_filtered_nodes(Goriginal, PARAMS)
    display("generate_graphs",
            "Received filtered node list from get_filtered_nodes().")

    filter_edgelist(PARAMS, filtered_nodelist)
    display("generate_graphs",
            "Filtered edge list recorded in sampled dataset.")
    Gbase = nx.read_edgelist(os.path.join(path_prefix,
                                          PARAMS['sampled_dataset']),
                             nodetype=int)
    display("generate_graphs", "Prepared Gbase from sampled dataset.")

    Gcomplete = nx.complete_graph(nx.number_of_nodes(Gbase))
    display("generate_graphs", "Prepared Gcomplete from sampled dataset.")

    assign_probabilities(Gcomplete, mean, sd)
    display("generate_graphs", "Assigned probabilities to all nodes.")

    edges_matched = boost_view_share_probabilities(Gcomplete, Gbase,
                                                   view_boost, share_boost)
    display(
        "generate_graphs",
        "Boosted view and share probabilities, number of matched edges is " +
        str(edges_matched))

    return Gbase, Gcomplete