def sensitive_3():
    top_50 = []
    f = open("../data/univ_top_50_cs.txt", "r")
    for line in f:
        line = line.strip().lower()
        top_50.append(line)
    f.close()

    fo = open(
        "../result/result_top50_cs_newdata_apr09/sensitivity/all/sensitivity_diff_hits_weighted-inedge1.csv",
        "w")
    node_list, edge_list = dp.read_data("../data/data_top50_cs_apr09.csv",
                                        self_edge=False)
    G = dp.construct_graph(node_list, edge_list)
    hits = algo.weighted_HITS(G, max_iterations=100, min_delta=0.00001)
    result = sorted(hits.iteritems(), key=lambda asd: asd[1], reverse=True)
    G.clear()

    rank = []
    for e in result:
        if e[0] in top_50:
            rank.append(e[0])

    original_r = []
    for e in result:
        if e[0] in top_50:
            original_r.append([e[0]])

    for k in range(len(original_r)):
        #         if not original_r[k][0] == "mit":
        node_list, edge_list = dp.read_data("../data/data_top50_cs_apr09.csv",
                                            self_edge=False)
        G = dp.construct_graph(node_list, edge_list)
        G = remove_significant_edge(
            G, original_r[k][0],
            rank=rank)  ### add one edge from MIT to <node>
        hits = algo.weighted_HITS(G, max_iterations=100, min_delta=0.00001)
        result = sorted(hits.iteritems(), key=lambda asd: asd[1], reverse=True)
        #result = sorted(hits.iteritems(), key = lambda asd:asd[1], reverse = True)
        G.clear()
        res1 = []
        for e in result:
            if e[0] in top_50:
                res1.append(e[0])
        kr = 0
        for i in range(len(res1)):
            if res1[i] == original_r[k][0]:
                kr = i
        original_r[k].append(k - kr)
    print original_r
    fo.write("univ,diff+mit1\n")
    for r in original_r:
        for i in range(len(r)):
            if i == 0:
                fo.write(str(r[i]))
            else:
                fo.write("," + str(r[i]))
        fo.write("\n")
    fo.close()
def sensitive_3():
    top_50 = []
    f = open("../data/univ_top_50_cs.txt","r")
    for line in f:
        line = line.strip().lower()
        top_50.append(line)
    f.close()
    
    fo = open("../result/result_top50_cs_newdata_apr09/sensitivity/all/sensitivity_diff_hits_weighted-inedge1.csv","w")
    node_list, edge_list = dp.read_data("../data/data_top50_cs_apr09.csv", self_edge = False)
    G = dp.construct_graph(node_list, edge_list)
    hits = algo.weighted_HITS(G, max_iterations = 100, min_delta = 0.00001)
    result = sorted(hits.iteritems(), key = lambda asd:asd[1], reverse = True)
    G.clear()
    
    rank = []
    for e in result:
        if e[0] in top_50:
            rank.append(e[0])

    original_r = []
    for e in result:
        if e[0] in top_50:
            original_r.append([e[0]])

    for k in range(len(original_r)):
#         if not original_r[k][0] == "mit":
            node_list, edge_list = dp.read_data("../data/data_top50_cs_apr09.csv", self_edge = False)
            G = dp.construct_graph(node_list, edge_list)
            G = remove_significant_edge(G, original_r[k][0], rank = rank) ### add one edge from MIT to <node>
            hits = algo.weighted_HITS(G, max_iterations = 100, min_delta = 0.00001)
            result = sorted(hits.iteritems(), key = lambda asd:asd[1], reverse = True)
            #result = sorted(hits.iteritems(), key = lambda asd:asd[1], reverse = True)
            G.clear()
            res1 = []
            for e in result:
                if e[0] in top_50:
                    res1.append(e[0])
            kr = 0
            for i in range(len(res1)):
                if res1[i] == original_r[k][0]:
                    kr = i
            original_r[k].append(k-kr)
    print original_r
    fo.write("univ,diff+mit1\n")
    for r in original_r:
        for i in range(len(r)):
            if i == 0:
                fo.write(str(r[i]))
            else:
                fo.write(","+str(r[i]))
        fo.write("\n")
    fo.close()
def choose_algorithm(G, type="hits_weighted"):
    result = []
    if type == "hits_weighted":
        result = algo.weighted_HITS(G, max_iterations=100, min_delta=0.00001)
        #print "hits_weighted", result
    elif type == "hits_hubavg":
        result = algo.hubavg_HITS(G, max_iterations=100, min_delta=0.00001)
        #print "hits_hubavg", result
    elif type == "weightedPR_w_norm":
        result = algo.weighted_PR_wnorm(G,
                                        damping_factor=0.85,
                                        max_iterations=100,
                                        min_delta=0.00001)
        #print "weightedPR_w_norm", result
    elif type == "weightedPR_wo_norm":
        result = algo.weighted_PR_wonorm(G,
                                         damping_factor=0.85,
                                         max_iterations=100,
                                         min_delta=0.00001)
        #print "weightedPR_wo_norm", result
    elif type == "indegree":
        result = dp.rank_univ_indegree(G)
        #print "indegree", result
    else:
        print "error type!"
    return result
def choose_algorithm(G, type = "hits_weighted"):
    result = []
    if type == "hits_weighted":
        result = algo.weighted_HITS(G, max_iterations = 100, min_delta = 0.00001)
        #print "hits_weighted", result
    elif type == "hits_hubavg":
        result = algo.hubavg_HITS(G, max_iterations = 100, min_delta = 0.00001)
        #print "hits_hubavg", result
    elif type == "weightedPR_w_norm":
        result = algo.weighted_PR_wnorm(G, damping_factor = 0.85, max_iterations = 100, min_delta = 0.00001)
        #print "weightedPR_w_norm", result
    elif type == "weightedPR_wo_norm":
        result = algo.weighted_PR_wonorm(G, damping_factor = 0.85, max_iterations = 100, min_delta = 0.00001)
        #print "weightedPR_wo_norm", result
    elif type == "indegree":
        result = dp.rank_univ_indegree(G)
        #print "indegree", result
    else:
        print "error type!"
    return result
Exemplo n.º 5
0
def main():
    
#     bucket = {}
#     f = open("../result/result_top50_cs_newdata_apr09/year_statistical_from1995_to2015.csv","r")
#     f.readline()
#     for line in f:
#         lines = line.split(",")
#         try:
#             bucket.update({lines[0].strip() : int(lines[2].strip())})
#         except:
#             pass
#     f.close()
#     
#     node_list, edge_list = dp.read_data_in_range("../data/data_may28_new/data_top50_ee.csv", 
#                                                  "../data/data_may28_new/top50_ee_2015.txt",
#                                                  start_year = 1992, end_year = 2013, self_edge = True)
    
    node_list, edge_list = dp.read_data("../data/data_may28_new/data_top50_ee.csv", 
                                        "../data/data_may28_new/top50_ee_2015.txt", 
                                        self_edge = False, extended = False)
    print len(node_list), node_list
    print len(edge_list), edge_list
    
    exit(0)
    
    G = dp.construct_graph(node_list, edge_list)
    
    top_50 = []
    f = open("../data/data_may28_new/top50_ee_2015.txt","r")
    for line in f:
        line = line.strip().lower()
        top_50.append(line)
    f.close()
    
    print len(G.edges())
    print len(G.nodes())

    nodes = dp.rank_univ(G, t = "in_degree")
    f = open("../result/result_may28/ee/comparison/ee_1951-1991_indegree.csv","w")
    for node in nodes:
        if node[0] in top_50:
            f.write("%s;%d\n" %(node[0], node[1]))
    f.close()

    weighted_pagerank = algo.weighted_PR_wnorm(G, damping_factor = 0.85, max_iterations = 100, min_delta = 0.00001)
    result = sorted(weighted_pagerank.iteritems(), key = lambda asd:asd[1], reverse = True)
    f = open("../result/result_may28/ee/comparison/ee_1992-2013_weightedPR_w_norm.csv","w")
    for r in result:
        if r[0] in top_50:
            f.write("%s;%.5f\n" %(r[0], r[1]))
    f.close()
    
    weighted_pagerank = algo.weighted_PR_wonorm(G, damping_factor = 0.85, max_iterations = 100, min_delta = 0.00001)
    s = sum(weighted_pagerank.values())
    for rank in weighted_pagerank:
        weighted_pagerank[rank] = weighted_pagerank[rank]*50.0/s
    result = sorted(weighted_pagerank.iteritems(), key = lambda asd:asd[1], reverse = True)
    f = open("../result/result_may28/ee/comparison/ee_1992-2013_weightedPR_wo_norm.csv","w")
    for r in result:
        if r[0] in top_50:
            f.write("%s;%.5f\n" %(r[0], r[1]))
    f.close()
#    
#     hits = algo.HITS(G, max_iterations = 100, min_delta = 0.00001)
#     result = sorted(hits.iteritems(), key = lambda asd:asd[1], reverse = True)
#     f = open("../result/result_may28/me/extendedGwselfedges/cs_hits.csv","w")
#     for r in result:
#         if r[0] in top_50:
#             f.write("%s;%.5f\n" %(r[0], r[1]))
#     f.close()
       
    hits = algo.weighted_HITS(G, max_iterations = 100, min_delta = 0.00001)
    result = sorted(hits.iteritems(), key = lambda asd:asd[1], reverse = True)
    f = open("../result/result_may28/ee/comparison/ee_1992-2013_hits_weighted.csv","w")
    for r in result:
        if r[0] in top_50:
            f.write("%s;%.5f\n" %(r[0], r[1]))
    f.close()
    
    hubavg = algo.hubavg_HITS(G, max_iterations = 100, min_delta = 0.00001)
    result = sorted(hubavg.iteritems(), key = lambda asd:asd[1], reverse = True)
    f = open("../result/result_may28/ee/comparison/ee_1992-2013_hits_hubavg.csv","w")
    for r in result:
        if r[0] in top_50:
            f.write("%s;%.5f\n" %(r[0], r[1]))
    f.close()

#     salsa = algo.SALSA(G)
#     result = sorted(salsa.iteritems(), key = lambda asd:asd[1], reverse = True)
#     f = open("../result/result_top50_cs_newdata_apr09/result_top50_cs/univ_top50_cs_from2000_salsa.csv","w")
#     for r in result:
#         f.write("%s;%.5f\n" %(r[0], r[1]))
#     f.close()
#       
#     salsa = algo.modified_SALSA(G)
#     result = sorted(salsa.iteritems(), key = lambda asd:asd[1], reverse = True)
#     f = open("../result/result_top50_cs_extended/entire/univ_top40_me_from1946_to1990_salsa_modified.csv","w")
#     for r in result:
#         if r[0] in top_50:
#             f.write("%s;%.5f\n" %(r[0], r[1]))
#     f.close()
#  
#     credit = algo.CreditPropagation(G, original_rank = hits, cr = 0.8, max_iterations = 10000, min_delta = 0.00001)
#     result = sorted(credit.iteritems(), key = lambda asd:asd[1], reverse = True)
#     f = open("../result/result_top50_cs_newdata_apr09/result_top50_cs_subtracted_woselfedge/univ_top50_cs_wo_selfedges_CreditProp_hits.csv","w")
#     for r in result:
#         if r[0] in top_50:
#             f.write("%s;%.5f\n" %(r[0], r[1]))
#     f.close()


    """ new experiments on authavg and weightedHITS_normalized @ May 13th """