def get_hits_venues(): mapping = snap.TStrIntSH() t0 = time() file_output_1 = open("paper_venues_hits_hub.txt", 'w') file_output_2 = open("paper_venues_hits_auth.txt", 'w') G0 = snap.LoadEdgeListStr(snap.PNGraph, "paperid_venueid_ref.txt", 0, 1, mapping) NIdHubH = snap.TIntFltH() NIdAuthH = snap.TIntFltH() snap.GetHits(G0, NIdHubH, NIdAuthH, 1000) print("HITS time:", round(time() - t0, 3), "s") for item in NIdHubH: file_output_1.write( str(mapping.GetKey(item)) + "," + str(NIdHubH[item]) + '\n') for item in NIdAuthH: file_output_2.write( str(mapping.GetKey(item)) + "," + str(NIdAuthH[item]) + '\n') # convert input string to node id # NodeId = mapping.GetKeyId("814DF491") # convert node id to input string # NodeName = mapping.GetKey(NodeId) # print "name", NodeName # print "id ", NodeId print("finish hits!") file_output_1.close() file_output_2.close()
def load_graph(self, name): ''' Helper function to load graphs. Check that the respective .txt files are in the same folder as this script; if not, change the paths below as required. ''' mapping = snap.TStrIntSH() G = snap.LoadEdgeListStr(snap.PNGraph, self.graphName, 0, 1, mapping) return G
def hits(graph_filename): # create graph name_id_map = snap.TStrIntSH() graph = snap.LoadEdgeListStr(snap.PNGraph, graph_filename, 0, 1, name_id_map) # run HITS algo id_hub_map = snap.TIntFltH() id_auth_map = snap.TIntFltH() snap.GetHits(graph, id_hub_map, id_auth_map, 1000) # iterate 1000 times return name_id_map, id_hub_map, id_auth_map
def getDataSource(forceReload=False): dataSourceFileName = "scoring.pkl" import os.path if forceReload or not os.path.isfile(dataSourceFileName): Data_Scraper.load_data() print('before mapping') mapping = snap.TStrIntSH() G = snap.LoadEdgeListStr(snap.PNGraph, "politics_edge_list.txt", 0, 1, mapping) rankedCommentsPos = [] rankedCommentsNeg = [] combinedNeg = [] combinedPos = [] #1000000 commentsToTake = 5000 for i in range(1, 5): rankedCommentData = Snap_Analytics.sort_comments(60, mapping, i) rankedCommentsPos.append(rankedCommentData[:commentsToTake]) rankedCommentsNeg.append( rankedCommentData[len(rankedCommentData) - commentsToTake * 4:len(rankedCommentData)]) combinedPos.extend(rankedCommentData[:commentsToTake / 4]) combinedNeg.extend(rankedCommentData[len(rankedCommentData) - (commentsToTake * 4) / 4:len(rankedCommentData)]) rankedCommentsPos.append(combinedPos) rankedCommentsNeg.append(combinedNeg) f = open(dataSourceFileName, 'wb') pickle.dump((rankedCommentsPos, rankedCommentsNeg), f) f.close() return (rankedCommentsPos, rankedCommentsNeg) else: print("loading from file") f = open(dataSourceFileName, 'rb') classifier = pickle.load(f) f.close() return (classifier[0], classifier[1])
import math import snap import networkx as nx import matplotlib.pyplot as plt G1 = snap.LoadEdgeListStr(snap.PUNGraph, "new.txt", 1, 0) total_no_of_communities = 0 community_list = [[0, 1, 2, 3], [4], [5, 6, 7]] sym = [[[1, 0.8660254037844387], [3, 0.75], [2, 1.0]], [[0, 0.8660254037844387], [2, 0.8660254037844387]], [[0, 1.0], [1, 0.8660254037844387], [3, 0.75]], [[0, 0.75], [2, 0.75], [4, 0.5773502691896258]], [[3, 0.5773502691896258], [5, 0.5773502691896258]], [[4, 0.5773502691896258], [6, 0.8660254037844387], [7, 0.8660254037844387]], [[5, 0.8660254037844387], [7, 1.0]], [[6, 1.0], [5, 0.8660254037844387]]] similarity_score = [] selected_seeds = [] priority_seeds = [] final_seed = [] degree_score = [] pq_candidate_nodes = [] hub_seeds = [] for i in range(len(sym)): sum_of_sym = 0 for j in range(len(sym[i])): sum_of_sym = sum_of_sym + sym[i][j][1] similarity_score.append(sum_of_sym) for NI in G1.Nodes():
import snap print "LoadEdgeListStr 1" companyEmployeeGraph = snap.LoadEdgeListStr(snap.PUNGraph, "c.txt", 0, 1) print "LoadEdgeListStr 2" mapping = snap.TStrIntH() companyEmployeeGraph = snap.LoadEdgeListStr(snap.PUNGraph, "c.txt", 0, 1, mapping)
import snap import numpy import matplotlib.pyplot as plt # Section 1 # # Load data from file, then construct a directed graph wiki_g = snap.LoadEdgeListStr(snap.PNGraph, "Wiki-Vote.txt", 0, 1) # solution to hw print('*' * 10 + ' Section I ' + '*' * 10) print("The wiki-vote graph has " + str(wiki_g.GetNodes()) + " nodes.") # self_loop_nodes = 0 # for edge in wiki_g.Edges(): # if edge.GetSrcNId() == edge.GetDstNId(): # self_loop_nodes = self_loop_nodes + 1 # Better use built-in functions to count the self-edges... print("The wiki-vote graph has " + str(snap.CntSelfEdges(wiki_g)) + " self-looped nodes.") print("The wiki-vote graph has " + str(snap.CntUniqDirEdges(wiki_g)) + " unique directed edges.") print("The wiki-vote graph has " + str(snap.CntUniqUndirEdges(wiki_g)) + " unique undirected edges.") print("The wiki-vote graph has " + str(int(snap.CntUniqUndirEdges(wiki_g) / 2)) + " reciprocated edges.") nodes_zero_out_degree = 0 nodes_zero_in_degree = 0 nodes_more_than_10_outgoing_edges = 0 nodes_fewer_than_10_incoming_edges = 0 min_out_degree = 1 max_out_degree = 1
from benchmark import benchmark import sys filename = sys.argv[1] n = int(sys.argv[2]) print(f"Profiling dataset {filename}") print("Profiling loading") print("=================") print() benchmark("snap.LoadEdgeListStr(snap.PNGraph, filename, 0, 1)", globals=globals(), n=n) g = snap.LoadEdgeListStr(snap.PNGraph, filename, 0, 1) print("Profiling 2-hops") print("================") print() NodeVec = snap.TIntV() benchmark("snap.GetNodesAtHop(g, 0, 2, NodeVec, True)", globals=globals(), n=n) print("Profiling shortest path") print("=======================") print() NIdToDistH = snap.TIntH() benchmark("snap.GetShortPath(g, 0, NIdToDistH, True)", globals=globals(), n=n)
import snap print("LoadEdgeListStr 1") G1 = snap.LoadEdgeListStr(snap.PUNGraph, "data/test-509.txt", 0, 1) print("nodes %d, edges %d" % (G1.GetNodes(), G1.GetEdges())) print("LoadEdgeListStr 2") #mapping = snap.TStrIntH() mapping = snap.TStrInt64SH() G2 = snap.LoadEdgeListStr(snap.PUNGraph, "data/test-509.txt", 0, 1, mapping) print("nodes %d, edges %d" % (G2.GetNodes(), G2.GetEdges()))
def main(): print('begin main') Data_Scraper.load_data() #measure_comment_lengths() # measure_comment_lengths() # FK_histogram() print('before mapping') #for i in xrange(25): # sample_random_comment(G, mapping) mapping = snap.TStrIntSH() G = snap.LoadEdgeListStr(snap.PNGraph, "politics_edge_list.txt", 0, 1, mapping) root = getRootNode(mapping, G) # FK_histogram() # FK_scores = getFKScores() # """ TRYING TO CREATE A SMALL GRAPH SAMPLE FOR VISUALIZATION """ # f = open("Mini_graph.csv", "w") # thread_id = random.sample(Data_Scraper.thread_ids, 1) # newRoot = G.GetNI(mapping.GetKeyId(thread_id)).GetId() # # newRoot = mapping.GetKeyId(thread_id) # f.write(str(root.GetId()) + "," + str(newRoot) + "\n") # sg = makeSubGraph(G, mapping) # for edge in sg.Edges(): # tup = edge.GetId() # f.write(str(tup[0]) + "," + str(tup[1]) + "\n") # f.close() # print "File written" # for key,value in FK_scores.iteritems(): # f.write(str(key) + "\t" + str(value) + "\n") # f.close() # print "File written" #stats_vec = comment_statistics(mapping, G) #print(stats_vec[4]) #getCommentHistogram([G.GetNI(n) for n in root.GetOutEdges()], G) #output = open('comment_stats.pkl', 'wb') #pickle.dump(stats_vec,output) #output.close() #sort_comments(200, mapping, 1) # pickle.dump(stats_vec,output) #output.close() # print "Nodes: " + str(Data_Scraper.all_comments) # print "Threads: " + str(len(Data_Scraper.thread_ids)) # print "Root comments: " + str(len(Data_Scraper.root_comments)) #print "delay: " + estimated_delay(144834, mapping, G) #record_comment_lengths() assemble_sequence_dict(mapping) print G.GetNodes() print G.GetEdges()
nodes.append(node) links = [] print("looping thru edges") for i in G.Edges(): edge = {} edge["source"] = mapping.GetKey(i.GetSrcNId()) edge["target"] = mapping.GetKey(i.GetDstNId()) edge["value"] = 1 links.append(edge) f = open("graph.json", "w") f.write(json.dumps({"nodes": nodes, "links": links})) f.close() # TODO: mean and standard deviation of FK - scores and word lengths if __name__ == "__main__": main() mapping = snap.TStrIntSH() G = snap.LoadEdgeListStr(snap.PNGraph, "politics_edge_list.txt", 0, 1, mapping) # root = getRootNode(mapping, G) # thread_sizes = {} # for thread in root.GetOutEdges(): # threadsize = _findDepth(G.GetNI(thread), G) # thread_sizes[mapping.GetKey(thread)] = threadsize
def load_directed_graph(name, _type=None): print(f"--- Load directed graph {name} ---") if _type == "str": return snap.LoadEdgeListStr(snap.TNGraph, name, 0, 1) return snap.LoadEdgeList(snap.TNGraph, name, 0, 1)
import snap mapping = snap.TStrIntSH() G0 = snap.LoadEdgeListStr(snap.PNEANet, "dataList.txt", 0, 1, mapping) snap.SaveEdgeList(G0, "testGraph.txt", "Save as tab-separated list of edges") snap.PrintInfo(G0) """ snap.DelDegKNodes(G0,1,0) snap.DelDegKNodes(G0,1,0) snap.DelDegKNodes(G0,1,0) snap.DelDegKNodes(G0,1,0) snap.PrintInfo(G0) DegToCntV = snap.TIntPrV() snap.GetOutDegCnt(G0, DegToCntV) for item in DegToCntV: print "%d nodes with out-degree %d" % (item.GetVal2(), item.GetVal1()) """ """ Components = snap.TCnComV() snap.GetSccs(G0, Components) for CnCom in Components: print "Size of component: %d" % CnCom.Len() """ """ DegToCntV = snap.TIntPrV()