def sensitive():
    top_50 = []
    f = open("../data/univ_top_50_cs.txt", "r")
    for line in f:
        line = line.strip().lower()
        top_50.append(line)
    f.close()

    fo = open(
        "../result/result_top50_cs_newdata_apr09/sensitivity/sensitivity_weightedPR_wo_norm_1995-2015+mit1.csv",
        "w")
    node_list, edge_list = dp.read_data_in_range(
        "../data/data_top50_cs_apr09.csv",
        start_year=1995,
        end_year=2015,
        self_edge=False)
    G = dp.construct_graph(node_list, edge_list)
    hits = algo.weighted_PR_wonorm(G,
                                   damping_factor=0.85,
                                   max_iterations=100,
                                   min_delta=0.00001)
    result = sorted(hits.iteritems(), key=lambda asd: asd[1], reverse=True)
    G.clear()
    original_r = []
    for e in result:
        if e[0] in top_50:
            original_r.append(e[0])
    fo.write("origin,")
    for node in original_r:
        fo.write("%s," % node)
    fo.write("\n")
    for node in top_50:
        if not node == "mit":
            node_list, edge_list = dp.read_data_in_range(
                "../data/data_top50_cs_apr09.csv",
                start_year=1995,
                end_year=2015,
                self_edge=False)
            G = dp.construct_graph(node_list, edge_list)
            G = add_non_existing_edges(
                G, node, "mit", weight=1)  ### add one edge from MIT to <node>
            hits = algo.weighted_PR_wonorm(G,
                                           damping_factor=0.85,
                                           max_iterations=100,
                                           min_delta=0.00001)
            result = sorted(hits.iteritems(),
                            key=lambda asd: asd[1],
                            reverse=True)
            #result = sorted(hits.iteritems(), key = lambda asd:asd[1], reverse = True)
            G.clear()
            res1 = []
            for e in result:
                if e[0] in top_50:
                    res1.append(e[0])
            fo.write("%s," % node)
            for r in res1:
                fo.write("%s," % r)
            fo.write("\n")
    fo.close()
def sensitive_3():
    top_50 = []
    f = open("../data/univ_top_50_cs.txt", "r")
    for line in f:
        line = line.strip().lower()
        top_50.append(line)
    f.close()

    fo = open(
        "../result/result_top50_cs_newdata_apr09/sensitivity/all/sensitivity_diff_hits_weighted-inedge1.csv",
        "w")
    node_list, edge_list = dp.read_data("../data/data_top50_cs_apr09.csv",
                                        self_edge=False)
    G = dp.construct_graph(node_list, edge_list)
    hits = algo.weighted_HITS(G, max_iterations=100, min_delta=0.00001)
    result = sorted(hits.iteritems(), key=lambda asd: asd[1], reverse=True)
    G.clear()

    rank = []
    for e in result:
        if e[0] in top_50:
            rank.append(e[0])

    original_r = []
    for e in result:
        if e[0] in top_50:
            original_r.append([e[0]])

    for k in range(len(original_r)):
        #         if not original_r[k][0] == "mit":
        node_list, edge_list = dp.read_data("../data/data_top50_cs_apr09.csv",
                                            self_edge=False)
        G = dp.construct_graph(node_list, edge_list)
        G = remove_significant_edge(
            G, original_r[k][0],
            rank=rank)  ### add one edge from MIT to <node>
        hits = algo.weighted_HITS(G, max_iterations=100, min_delta=0.00001)
        result = sorted(hits.iteritems(), key=lambda asd: asd[1], reverse=True)
        #result = sorted(hits.iteritems(), key = lambda asd:asd[1], reverse = True)
        G.clear()
        res1 = []
        for e in result:
            if e[0] in top_50:
                res1.append(e[0])
        kr = 0
        for i in range(len(res1)):
            if res1[i] == original_r[k][0]:
                kr = i
        original_r[k].append(k - kr)
    print original_r
    fo.write("univ,diff+mit1\n")
    for r in original_r:
        for i in range(len(r)):
            if i == 0:
                fo.write(str(r[i]))
            else:
                fo.write("," + str(r[i]))
        fo.write("\n")
    fo.close()
def sensitive_add_edge(filename1, filename2, outputfilename, type = "hits_weighted", add_node = "mit"):
    top_50 = []
    f = open(filename2,"r")
    for line in f:
        line = line.strip().lower()
        top_50.append(line)
    f.close()
    
    fo = open(outputfilename,"w")
    node_list, edge_list = dp.read_data(filename1, filename2, self_edge = False, extended = True)
    G = dp.construct_graph(node_list, edge_list)
    #r = algo.weighted_HITS(G, max_iterations = 100, min_delta = 0.00001)
    r = choose_algorithm(G, type = type)
    result = sorted(r.iteritems(), key = lambda asd:asd[1], reverse = True)
    G.clear()
    
    rank = []
    for e in result:
        if e[0] in top_50:
            rank.append(e[0])

    original_r = []
    for e in result:
        if e[0] in top_50:
            original_r.append([e[0]])

    for k in range(len(original_r)):
#         if not original_r[k][0] == "mit":
            node_list, edge_list = dp.read_data(filename1, filename2, self_edge = False, extended = True)
            G = dp.construct_graph(node_list, edge_list)
            G = G = add_non_existing_edges(G, original_r[k][0], add_node, weight = 1) ### add one edge from MIT to <node>
            #r = algo.weighted_HITS(G, max_iterations = 100, min_delta = 0.00001)
            r = choose_algorithm(G, type = type)
            result = sorted(r.iteritems(), key = lambda asd:asd[1], reverse = True)
            #result = sorted(hits.iteritems(), key = lambda asd:asd[1], reverse = True)
            G.clear()
            res1 = []
            for e in result:
                if e[0] in top_50:
                    res1.append(e[0])
            kr = 0
            for i in range(len(res1)):
                if res1[i] == original_r[k][0]:
                    kr = i
            original_r[k].append(k-kr)
    print original_r
    fo.write("univ,diff+%s1\n" %(add_node))
    for r in original_r:
        for i in range(len(r)):
            if i == 0:
                fo.write(str(r[i]))
            else:
                fo.write(","+str(r[i]))
        fo.write("\n")
    fo.close()
def sensitive_3():
    top_50 = []
    f = open("../data/univ_top_50_cs.txt","r")
    for line in f:
        line = line.strip().lower()
        top_50.append(line)
    f.close()
    
    fo = open("../result/result_top50_cs_newdata_apr09/sensitivity/all/sensitivity_diff_hits_weighted-inedge1.csv","w")
    node_list, edge_list = dp.read_data("../data/data_top50_cs_apr09.csv", self_edge = False)
    G = dp.construct_graph(node_list, edge_list)
    hits = algo.weighted_HITS(G, max_iterations = 100, min_delta = 0.00001)
    result = sorted(hits.iteritems(), key = lambda asd:asd[1], reverse = True)
    G.clear()
    
    rank = []
    for e in result:
        if e[0] in top_50:
            rank.append(e[0])

    original_r = []
    for e in result:
        if e[0] in top_50:
            original_r.append([e[0]])

    for k in range(len(original_r)):
#         if not original_r[k][0] == "mit":
            node_list, edge_list = dp.read_data("../data/data_top50_cs_apr09.csv", self_edge = False)
            G = dp.construct_graph(node_list, edge_list)
            G = remove_significant_edge(G, original_r[k][0], rank = rank) ### add one edge from MIT to <node>
            hits = algo.weighted_HITS(G, max_iterations = 100, min_delta = 0.00001)
            result = sorted(hits.iteritems(), key = lambda asd:asd[1], reverse = True)
            #result = sorted(hits.iteritems(), key = lambda asd:asd[1], reverse = True)
            G.clear()
            res1 = []
            for e in result:
                if e[0] in top_50:
                    res1.append(e[0])
            kr = 0
            for i in range(len(res1)):
                if res1[i] == original_r[k][0]:
                    kr = i
            original_r[k].append(k-kr)
    print original_r
    fo.write("univ,diff+mit1\n")
    for r in original_r:
        for i in range(len(r)):
            if i == 0:
                fo.write(str(r[i]))
            else:
                fo.write(","+str(r[i]))
        fo.write("\n")
    fo.close()
def sensitive_2():
    top_50 = []
    f = open("../data/univ_top_50_cs.txt","r")
    for line in f:
        line = line.strip().lower()
        top_50.append(line)
    f.close()
    
    fo = open("../result/result_top50_cs_newdata_apr09/sensitivity/mit+1/sensitivity_diff_CreditProp_hits_1995-2015+mit1.csv","w")
    node_list, edge_list = dp.read_data_in_range("../data/data_top50_cs_apr09.csv", start_year = 1995, end_year = 2015, self_edge = False)
    G = dp.construct_graph(node_list, edge_list)
    hits = algo.HITS(G, max_iterations = 100, min_delta = 0.00001)
    hits = algo.CreditPropagation(G, original_rank = hits, cr = 0.85, max_iterations = 100, min_delta = 0.00001)
    result = sorted(hits.iteritems(), key = lambda asd:asd[1], reverse = True)
    G.clear()
    original_r = []
    for e in result:
        if e[0] in top_50:
            original_r.append([e[0]])

    for k in range(len(original_r)):
        if not original_r[k][0] == "mit":
            node_list, edge_list = dp.read_data_in_range("../data/data_top50_cs_apr09.csv", start_year = 1995, end_year = 2015, self_edge = False)
            G = dp.construct_graph(node_list, edge_list)
            G = add_non_existing_edges(G, original_r[k][0], "mit", weight = 1) ### add one edge from MIT to <node>
            hits = algo.HITS(G, max_iterations = 100, min_delta = 0.00001)
            hits = algo.CreditPropagation(G, original_rank = hits, cr = 0.85, max_iterations = 100, min_delta = 0.00001)
            result = sorted(hits.iteritems(), key = lambda asd:asd[1], reverse = True)
            #result = sorted(hits.iteritems(), key = lambda asd:asd[1], reverse = True)
            G.clear()
            res1 = []
            for e in result:
                if e[0] in top_50:
                    res1.append(e[0])
            kr = 0
            for i in range(len(res1)):
                if res1[i] == original_r[k][0]:
                    kr = i
            original_r[k].append(k-kr)
    print original_r
    fo.write("univ,diff+mit1\n")
    for r in original_r:
        for i in range(len(r)):
            if i == 0:
                fo.write(str(r[i]))
            else:
                fo.write(","+str(r[i]))
        fo.write("\n")
    fo.close()
def sensitive():
    top_50 = []
    f = open("../data/univ_top_50_cs.txt","r")
    for line in f:
        line = line.strip().lower()
        top_50.append(line)
    f.close()
    
    fo = open("../result/result_top50_cs_newdata_apr09/sensitivity/sensitivity_weightedPR_wo_norm_1995-2015+mit1.csv","w")
    node_list, edge_list = dp.read_data_in_range("../data/data_top50_cs_apr09.csv", start_year = 1995, end_year = 2015, self_edge = False)
    G = dp.construct_graph(node_list, edge_list)
    hits = algo.weighted_PR_wonorm(G, damping_factor = 0.85, max_iterations = 100, min_delta = 0.00001)
    result = sorted(hits.iteritems(), key = lambda asd:asd[1], reverse = True)
    G.clear()
    original_r = []
    for e in result:
        if e[0] in top_50:
            original_r.append(e[0])
    fo.write("origin,")
    for node in original_r:
        fo.write("%s," %node)
    fo.write("\n")
    for node in top_50:
        if not node == "mit":
            node_list, edge_list = dp.read_data_in_range("../data/data_top50_cs_apr09.csv", start_year = 1995, end_year = 2015, self_edge = False)
            G = dp.construct_graph(node_list, edge_list)
            G = add_non_existing_edges(G, node, "mit", weight = 1) ### add one edge from MIT to <node>
            hits = algo.weighted_PR_wonorm(G, damping_factor = 0.85, max_iterations = 100, min_delta = 0.00001)
            result = sorted(hits.iteritems(), key = lambda asd:asd[1], reverse = True)
            #result = sorted(hits.iteritems(), key = lambda asd:asd[1], reverse = True)
            G.clear()
            res1 = []
            for e in result:
                if e[0] in top_50:
                    res1.append(e[0])
            fo.write("%s," %node)
            for r in res1:
                fo.write("%s," %r)
            fo.write("\n")
    fo.close()
def sensitive_2():
    top_50 = []
    f = open("../data/univ_top_50_cs.txt", "r")
    for line in f:
        line = line.strip().lower()
        top_50.append(line)
    f.close()

    fo = open(
        "../result/result_top50_cs_newdata_apr09/sensitivity/mit+1/sensitivity_diff_CreditProp_hits_1995-2015+mit1.csv",
        "w")
    node_list, edge_list = dp.read_data_in_range(
        "../data/data_top50_cs_apr09.csv",
        start_year=1995,
        end_year=2015,
        self_edge=False)
    G = dp.construct_graph(node_list, edge_list)
    hits = algo.HITS(G, max_iterations=100, min_delta=0.00001)
    hits = algo.CreditPropagation(G,
                                  original_rank=hits,
                                  cr=0.85,
                                  max_iterations=100,
                                  min_delta=0.00001)
    result = sorted(hits.iteritems(), key=lambda asd: asd[1], reverse=True)
    G.clear()
    original_r = []
    for e in result:
        if e[0] in top_50:
            original_r.append([e[0]])

    for k in range(len(original_r)):
        if not original_r[k][0] == "mit":
            node_list, edge_list = dp.read_data_in_range(
                "../data/data_top50_cs_apr09.csv",
                start_year=1995,
                end_year=2015,
                self_edge=False)
            G = dp.construct_graph(node_list, edge_list)
            G = add_non_existing_edges(
                G, original_r[k][0], "mit",
                weight=1)  ### add one edge from MIT to <node>
            hits = algo.HITS(G, max_iterations=100, min_delta=0.00001)
            hits = algo.CreditPropagation(G,
                                          original_rank=hits,
                                          cr=0.85,
                                          max_iterations=100,
                                          min_delta=0.00001)
            result = sorted(hits.iteritems(),
                            key=lambda asd: asd[1],
                            reverse=True)
            #result = sorted(hits.iteritems(), key = lambda asd:asd[1], reverse = True)
            G.clear()
            res1 = []
            for e in result:
                if e[0] in top_50:
                    res1.append(e[0])
            kr = 0
            for i in range(len(res1)):
                if res1[i] == original_r[k][0]:
                    kr = i
            original_r[k].append(k - kr)
    print original_r
    fo.write("univ,diff+mit1\n")
    for r in original_r:
        for i in range(len(r)):
            if i == 0:
                fo.write(str(r[i]))
            else:
                fo.write("," + str(r[i]))
        fo.write("\n")
    fo.close()
def sensitive_add_edge(filename1,
                       filename2,
                       outputfilename,
                       type="hits_weighted",
                       add_node="mit"):
    top_50 = []
    f = open(filename2, "r")
    for line in f:
        line = line.strip().lower()
        top_50.append(line)
    f.close()

    fo = open(outputfilename, "w")
    node_list, edge_list = dp.read_data(filename1,
                                        filename2,
                                        self_edge=False,
                                        extended=True)
    G = dp.construct_graph(node_list, edge_list)
    #r = algo.weighted_HITS(G, max_iterations = 100, min_delta = 0.00001)
    r = choose_algorithm(G, type=type)
    result = sorted(r.iteritems(), key=lambda asd: asd[1], reverse=True)
    G.clear()

    rank = []
    for e in result:
        if e[0] in top_50:
            rank.append(e[0])

    original_r = []
    for e in result:
        if e[0] in top_50:
            original_r.append([e[0]])

    for k in range(len(original_r)):
        #         if not original_r[k][0] == "mit":
        node_list, edge_list = dp.read_data(filename1,
                                            filename2,
                                            self_edge=False,
                                            extended=True)
        G = dp.construct_graph(node_list, edge_list)
        G = G = add_non_existing_edges(
            G, original_r[k][0], add_node,
            weight=1)  ### add one edge from MIT to <node>
        #r = algo.weighted_HITS(G, max_iterations = 100, min_delta = 0.00001)
        r = choose_algorithm(G, type=type)
        result = sorted(r.iteritems(), key=lambda asd: asd[1], reverse=True)
        #result = sorted(hits.iteritems(), key = lambda asd:asd[1], reverse = True)
        G.clear()
        res1 = []
        for e in result:
            if e[0] in top_50:
                res1.append(e[0])
        kr = 0
        for i in range(len(res1)):
            if res1[i] == original_r[k][0]:
                kr = i
        original_r[k].append(k - kr)
    print original_r
    fo.write("univ,diff+%s1\n" % (add_node))
    for r in original_r:
        for i in range(len(r)):
            if i == 0:
                fo.write(str(r[i]))
            else:
                fo.write("," + str(r[i]))
        fo.write("\n")
    fo.close()
Пример #9
0
@author: Bolun
"""
import data_processing as dp
import algorithms as algo
import networkx as nx
import ranking_evaluation as reval

list1 = []
f = open("../data/univ_top_50_cs.csv", "r")
for line in f:
    list1.append(line.strip())
f.close()

node_list, edge_list = dp.read_data("../data/data_top50_cs.csv")
G = dp.construct_graph(node_list, edge_list)

# orank = algo.weighted_PR_wonorm(G, damping_factor = 0.85, max_iterations = 100, min_delta = 0.00001)
# s = sum(orank.values())
# for rank in orank:
#     orank[rank] = orank[rank]*50.0/s
# result = sorted(orank.iteritems(), key = lambda asd:asd[1], reverse = True)

orank = algo.HITS(G, max_iterations=100, min_delta=0.00001)
result = sorted(orank.iteritems(), key=lambda asd: asd[1], reverse=True)
print result

f = open("../result/result_top50_cs/CreditPropagation_hits_evaluation.csv", "w")
f.write("cr;dist\n")
i = 0.0
while i <= 1.0:
Пример #10
0
@author: Bolun
"""
import data_processing as dp
import algorithms as algo
import networkx as nx
import ranking_evaluation as reval

list1 = []
f = open("../data/univ_top_50_cs.csv","r")
for line in f:
    list1.append(line.strip())
f.close()

node_list, edge_list = dp.read_data("../data/data_top50_cs.csv")
G = dp.construct_graph(node_list, edge_list)

# orank = algo.weighted_PR_wonorm(G, damping_factor = 0.85, max_iterations = 100, min_delta = 0.00001)
# s = sum(orank.values()) 
# for rank in orank:
#     orank[rank] = orank[rank]*50.0/s
# result = sorted(orank.iteritems(), key = lambda asd:asd[1], reverse = True)

orank = algo.HITS(G, max_iterations = 100, min_delta = 0.00001)
result = sorted(orank.iteritems(), key = lambda asd:asd[1], reverse = True)
print result

f = open("../result/result_top50_cs/CreditPropagation_hits_evaluation.csv","w")
f.write("cr;dist\n")
i = 0.0
while (i <= 1.0):
Пример #11
0
def main():
    
#     bucket = {}
#     f = open("../result/result_top50_cs_newdata_apr09/year_statistical_from1995_to2015.csv","r")
#     f.readline()
#     for line in f:
#         lines = line.split(",")
#         try:
#             bucket.update({lines[0].strip() : int(lines[2].strip())})
#         except:
#             pass
#     f.close()
#     
#     node_list, edge_list = dp.read_data_in_range("../data/data_may28_new/data_top50_ee.csv", 
#                                                  "../data/data_may28_new/top50_ee_2015.txt",
#                                                  start_year = 1992, end_year = 2013, self_edge = True)
    
    node_list, edge_list = dp.read_data("../data/data_may28_new/data_top50_ee.csv", 
                                        "../data/data_may28_new/top50_ee_2015.txt", 
                                        self_edge = False, extended = False)
    print len(node_list), node_list
    print len(edge_list), edge_list
    
    exit(0)
    
    G = dp.construct_graph(node_list, edge_list)
    
    top_50 = []
    f = open("../data/data_may28_new/top50_ee_2015.txt","r")
    for line in f:
        line = line.strip().lower()
        top_50.append(line)
    f.close()
    
    print len(G.edges())
    print len(G.nodes())

    nodes = dp.rank_univ(G, t = "in_degree")
    f = open("../result/result_may28/ee/comparison/ee_1951-1991_indegree.csv","w")
    for node in nodes:
        if node[0] in top_50:
            f.write("%s;%d\n" %(node[0], node[1]))
    f.close()

    weighted_pagerank = algo.weighted_PR_wnorm(G, damping_factor = 0.85, max_iterations = 100, min_delta = 0.00001)
    result = sorted(weighted_pagerank.iteritems(), key = lambda asd:asd[1], reverse = True)
    f = open("../result/result_may28/ee/comparison/ee_1992-2013_weightedPR_w_norm.csv","w")
    for r in result:
        if r[0] in top_50:
            f.write("%s;%.5f\n" %(r[0], r[1]))
    f.close()
    
    weighted_pagerank = algo.weighted_PR_wonorm(G, damping_factor = 0.85, max_iterations = 100, min_delta = 0.00001)
    s = sum(weighted_pagerank.values())
    for rank in weighted_pagerank:
        weighted_pagerank[rank] = weighted_pagerank[rank]*50.0/s
    result = sorted(weighted_pagerank.iteritems(), key = lambda asd:asd[1], reverse = True)
    f = open("../result/result_may28/ee/comparison/ee_1992-2013_weightedPR_wo_norm.csv","w")
    for r in result:
        if r[0] in top_50:
            f.write("%s;%.5f\n" %(r[0], r[1]))
    f.close()
#    
#     hits = algo.HITS(G, max_iterations = 100, min_delta = 0.00001)
#     result = sorted(hits.iteritems(), key = lambda asd:asd[1], reverse = True)
#     f = open("../result/result_may28/me/extendedGwselfedges/cs_hits.csv","w")
#     for r in result:
#         if r[0] in top_50:
#             f.write("%s;%.5f\n" %(r[0], r[1]))
#     f.close()
       
    hits = algo.weighted_HITS(G, max_iterations = 100, min_delta = 0.00001)
    result = sorted(hits.iteritems(), key = lambda asd:asd[1], reverse = True)
    f = open("../result/result_may28/ee/comparison/ee_1992-2013_hits_weighted.csv","w")
    for r in result:
        if r[0] in top_50:
            f.write("%s;%.5f\n" %(r[0], r[1]))
    f.close()
    
    hubavg = algo.hubavg_HITS(G, max_iterations = 100, min_delta = 0.00001)
    result = sorted(hubavg.iteritems(), key = lambda asd:asd[1], reverse = True)
    f = open("../result/result_may28/ee/comparison/ee_1992-2013_hits_hubavg.csv","w")
    for r in result:
        if r[0] in top_50:
            f.write("%s;%.5f\n" %(r[0], r[1]))
    f.close()

#     salsa = algo.SALSA(G)
#     result = sorted(salsa.iteritems(), key = lambda asd:asd[1], reverse = True)
#     f = open("../result/result_top50_cs_newdata_apr09/result_top50_cs/univ_top50_cs_from2000_salsa.csv","w")
#     for r in result:
#         f.write("%s;%.5f\n" %(r[0], r[1]))
#     f.close()
#       
#     salsa = algo.modified_SALSA(G)
#     result = sorted(salsa.iteritems(), key = lambda asd:asd[1], reverse = True)
#     f = open("../result/result_top50_cs_extended/entire/univ_top40_me_from1946_to1990_salsa_modified.csv","w")
#     for r in result:
#         if r[0] in top_50:
#             f.write("%s;%.5f\n" %(r[0], r[1]))
#     f.close()
#  
#     credit = algo.CreditPropagation(G, original_rank = hits, cr = 0.8, max_iterations = 10000, min_delta = 0.00001)
#     result = sorted(credit.iteritems(), key = lambda asd:asd[1], reverse = True)
#     f = open("../result/result_top50_cs_newdata_apr09/result_top50_cs_subtracted_woselfedge/univ_top50_cs_wo_selfedges_CreditProp_hits.csv","w")
#     for r in result:
#         if r[0] in top_50:
#             f.write("%s;%.5f\n" %(r[0], r[1]))
#     f.close()


    """ new experiments on authavg and weightedHITS_normalized @ May 13th """