Пример #1
0
def get_hits_venues():
    mapping = snap.TStrIntSH()
    t0 = time()
    file_output_1 = open("paper_venues_hits_hub.txt", 'w')
    file_output_2 = open("paper_venues_hits_auth.txt", 'w')
    G0 = snap.LoadEdgeListStr(snap.PNGraph, "paperid_venueid_ref.txt", 0, 1,
                              mapping)
    NIdHubH = snap.TIntFltH()
    NIdAuthH = snap.TIntFltH()
    snap.GetHits(G0, NIdHubH, NIdAuthH, 1000)
    print("HITS time:", round(time() - t0, 3), "s")
    for item in NIdHubH:
        file_output_1.write(
            str(mapping.GetKey(item)) + "," + str(NIdHubH[item]) + '\n')
    for item in NIdAuthH:
        file_output_2.write(
            str(mapping.GetKey(item)) + "," + str(NIdAuthH[item]) + '\n')
    # convert input string to node id
    # NodeId = mapping.GetKeyId("814DF491")
    # convert node id to input string
    # NodeName = mapping.GetKey(NodeId)
    # print "name", NodeName
    # print "id  ", NodeId
    print("finish hits!")
    file_output_1.close()
    file_output_2.close()
    def load_graph(self, name):
        '''
	    Helper function to load graphs.
	    Check that the respective .txt files are in the same folder as this script;
	    if not, change the paths below as required.
	    '''
        mapping = snap.TStrIntSH()
        G = snap.LoadEdgeListStr(snap.PNGraph, self.graphName, 0, 1, mapping)
        return G
Пример #3
0
def hits(graph_filename):
    # create graph
    name_id_map = snap.TStrIntSH()
    graph = snap.LoadEdgeListStr(snap.PNGraph, graph_filename, 0, 1,
                                 name_id_map)

    # run HITS algo
    id_hub_map = snap.TIntFltH()
    id_auth_map = snap.TIntFltH()
    snap.GetHits(graph, id_hub_map, id_auth_map, 1000)  # iterate 1000 times

    return name_id_map, id_hub_map, id_auth_map
Пример #4
0
def getDataSource(forceReload=False):
    dataSourceFileName = "scoring.pkl"
    import os.path

    if forceReload or not os.path.isfile(dataSourceFileName):
        Data_Scraper.load_data()
        print('before mapping')
        mapping = snap.TStrIntSH()
        G = snap.LoadEdgeListStr(snap.PNGraph, "politics_edge_list.txt", 0, 1,
                                 mapping)

        rankedCommentsPos = []
        rankedCommentsNeg = []
        combinedNeg = []
        combinedPos = []

        #1000000
        commentsToTake = 5000

        for i in range(1, 5):
            rankedCommentData = Snap_Analytics.sort_comments(60, mapping, i)
            rankedCommentsPos.append(rankedCommentData[:commentsToTake])
            rankedCommentsNeg.append(
                rankedCommentData[len(rankedCommentData) -
                                  commentsToTake * 4:len(rankedCommentData)])
            combinedPos.extend(rankedCommentData[:commentsToTake / 4])
            combinedNeg.extend(rankedCommentData[len(rankedCommentData) -
                                                 (commentsToTake * 4) /
                                                 4:len(rankedCommentData)])

        rankedCommentsPos.append(combinedPos)
        rankedCommentsNeg.append(combinedNeg)

        f = open(dataSourceFileName, 'wb')
        pickle.dump((rankedCommentsPos, rankedCommentsNeg), f)
        f.close()
        return (rankedCommentsPos, rankedCommentsNeg)

    else:
        print("loading from file")
        f = open(dataSourceFileName, 'rb')
        classifier = pickle.load(f)
        f.close()
        return (classifier[0], classifier[1])
Пример #5
0
import math
import snap
import networkx as nx
import matplotlib.pyplot as plt

G1 = snap.LoadEdgeListStr(snap.PUNGraph, "new.txt", 1, 0)
total_no_of_communities = 0
community_list = [[0, 1, 2, 3], [4], [5, 6, 7]]
sym = [[[1, 0.8660254037844387], [3, 0.75], [2, 1.0]],
       [[0, 0.8660254037844387], [2, 0.8660254037844387]],
       [[0, 1.0], [1, 0.8660254037844387], [3, 0.75]],
       [[0, 0.75], [2, 0.75], [4, 0.5773502691896258]],
       [[3, 0.5773502691896258], [5, 0.5773502691896258]],
       [[4, 0.5773502691896258], [6, 0.8660254037844387],
        [7, 0.8660254037844387]], [[5, 0.8660254037844387], [7, 1.0]],
       [[6, 1.0], [5, 0.8660254037844387]]]
similarity_score = []
selected_seeds = []
priority_seeds = []
final_seed = []
degree_score = []
pq_candidate_nodes = []
hub_seeds = []

for i in range(len(sym)):
    sum_of_sym = 0
    for j in range(len(sym[i])):
        sum_of_sym = sum_of_sym + sym[i][j][1]
    similarity_score.append(sum_of_sym)

for NI in G1.Nodes():
Пример #6
0
import snap

print "LoadEdgeListStr 1"
companyEmployeeGraph = snap.LoadEdgeListStr(snap.PUNGraph, "c.txt", 0, 1)

print "LoadEdgeListStr 2"
mapping = snap.TStrIntH()
companyEmployeeGraph = snap.LoadEdgeListStr(snap.PUNGraph, "c.txt", 0, 1,
                                            mapping)
Пример #7
0
import snap
import numpy
import matplotlib.pyplot as plt

# Section 1 #
# Load data from file, then construct a directed graph
wiki_g = snap.LoadEdgeListStr(snap.PNGraph, "Wiki-Vote.txt", 0, 1)
# solution to hw
print('*' * 10 + ' Section I ' + '*' * 10)
print("The wiki-vote graph has " + str(wiki_g.GetNodes()) + " nodes.")
# self_loop_nodes = 0
# for edge in wiki_g.Edges():
#    if edge.GetSrcNId() == edge.GetDstNId():
#        self_loop_nodes = self_loop_nodes + 1
# Better use built-in functions to count the self-edges...
print("The wiki-vote graph has " + str(snap.CntSelfEdges(wiki_g)) +
      " self-looped nodes.")
print("The wiki-vote graph has " + str(snap.CntUniqDirEdges(wiki_g)) +
      " unique directed edges.")
print("The wiki-vote graph has " + str(snap.CntUniqUndirEdges(wiki_g)) +
      " unique undirected edges.")
print("The wiki-vote graph has " +
      str(int(snap.CntUniqUndirEdges(wiki_g) / 2)) + " reciprocated edges.")

nodes_zero_out_degree = 0
nodes_zero_in_degree = 0
nodes_more_than_10_outgoing_edges = 0
nodes_fewer_than_10_incoming_edges = 0

min_out_degree = 1
max_out_degree = 1
Пример #8
0
from benchmark import benchmark
import sys

filename = sys.argv[1]
n = int(sys.argv[2])

print(f"Profiling dataset {filename}")

print("Profiling loading")
print("=================")
print()

benchmark("snap.LoadEdgeListStr(snap.PNGraph, filename, 0, 1)",
          globals=globals(),
          n=n)
g = snap.LoadEdgeListStr(snap.PNGraph, filename, 0, 1)

print("Profiling 2-hops")
print("================")
print()

NodeVec = snap.TIntV()
benchmark("snap.GetNodesAtHop(g, 0, 2, NodeVec, True)", globals=globals(), n=n)

print("Profiling shortest path")
print("=======================")
print()

NIdToDistH = snap.TIntH()
benchmark("snap.GetShortPath(g, 0, NIdToDistH, True)", globals=globals(), n=n)
Пример #9
0
import snap

print("LoadEdgeListStr 1")
G1 = snap.LoadEdgeListStr(snap.PUNGraph, "data/test-509.txt", 0, 1)
print("nodes %d, edges %d" % (G1.GetNodes(), G1.GetEdges()))

print("LoadEdgeListStr 2")
#mapping = snap.TStrIntH()
mapping = snap.TStrInt64SH()
G2 = snap.LoadEdgeListStr(snap.PUNGraph, "data/test-509.txt", 0, 1, mapping)
print("nodes %d, edges %d" % (G2.GetNodes(), G2.GetEdges()))
Пример #10
0
def main():
	print('begin main')
	Data_Scraper.load_data()
	#measure_comment_lengths()
	
	# measure_comment_lengths()
	# FK_histogram()

	
	print('before mapping')
	
	#for i in xrange(25):
	#	sample_random_comment(G, mapping)

	mapping = snap.TStrIntSH()
	G = snap.LoadEdgeListStr(snap.PNGraph, "politics_edge_list.txt", 0, 1, mapping)

	root = getRootNode(mapping, G)

	# FK_histogram()

	# FK_scores = getFKScores()

	# """ TRYING TO CREATE A SMALL GRAPH SAMPLE FOR VISUALIZATION """
	# f = open("Mini_graph.csv", "w")
	# thread_id = random.sample(Data_Scraper.thread_ids, 1)
	# newRoot = G.GetNI(mapping.GetKeyId(thread_id)).GetId()
	# # newRoot = mapping.GetKeyId(thread_id)
	# f.write(str(root.GetId()) + "," + str(newRoot) + "\n")
	# sg = makeSubGraph(G, mapping)
	# for edge in sg.Edges():
	# 	tup = edge.GetId()
	# 	f.write(str(tup[0]) + "," + str(tup[1]) + "\n")
	# f.close()
	# print "File written"
	
	# for key,value in FK_scores.iteritems():
	# 	f.write(str(key) + "\t" + str(value) + "\n")
	# f.close()
	# print "File written"



	#stats_vec = comment_statistics(mapping, G)
	#print(stats_vec[4])


	#getCommentHistogram([G.GetNI(n) for n in root.GetOutEdges()], G)
	#output = open('comment_stats.pkl', 'wb')
	#pickle.dump(stats_vec,output)

	#output.close()


	#sort_comments(200, mapping, 1)



	# pickle.dump(stats_vec,output)
	#output.close()

	# print "Nodes: " + str(Data_Scraper.all_comments)
	# print "Threads: " + str(len(Data_Scraper.thread_ids))
	# print "Root comments: " + str(len(Data_Scraper.root_comments))


	#print "delay: " + estimated_delay(144834, mapping, G)
	
	#record_comment_lengths()
	
	assemble_sequence_dict(mapping)
	print G.GetNodes()
	print G.GetEdges()
Пример #11
0
		nodes.append(node)
	links = []
	print("looping thru edges")
	for i in G.Edges():
		edge = {}
		edge["source"] = mapping.GetKey(i.GetSrcNId())
		edge["target"] = mapping.GetKey(i.GetDstNId())
		edge["value"] = 1
		links.append(edge)
	f = open("graph.json", "w")
	f.write(json.dumps({"nodes": nodes, "links": links}))
	f.close()

	
# TODO: mean and standard deviation of FK - scores and word lengths

if __name__ == "__main__":
	main()

mapping = snap.TStrIntSH()
G = snap.LoadEdgeListStr(snap.PNGraph, "politics_edge_list.txt", 0, 1, mapping)
# root = getRootNode(mapping, G)
# thread_sizes = {}
# for thread in root.GetOutEdges():
# 		threadsize = _findDepth(G.GetNI(thread), G)
# 		thread_sizes[mapping.GetKey(thread)] = threadsize




Пример #12
0
def load_directed_graph(name, _type=None):
    print(f"--- Load directed graph {name} ---")
    if _type == "str":
        return snap.LoadEdgeListStr(snap.TNGraph, name, 0, 1)
    return snap.LoadEdgeList(snap.TNGraph, name, 0, 1)
Пример #13
0
import snap

mapping = snap.TStrIntSH()
G0 = snap.LoadEdgeListStr(snap.PNEANet, "dataList.txt", 0, 1, mapping)

snap.SaveEdgeList(G0, "testGraph.txt", "Save as tab-separated list of edges")

snap.PrintInfo(G0)
"""

snap.DelDegKNodes(G0,1,0)
snap.DelDegKNodes(G0,1,0)
snap.DelDegKNodes(G0,1,0)
snap.DelDegKNodes(G0,1,0)

snap.PrintInfo(G0)


DegToCntV = snap.TIntPrV()
snap.GetOutDegCnt(G0, DegToCntV)
for item in DegToCntV:
    print "%d nodes with out-degree %d" % (item.GetVal2(), item.GetVal1())
"""
"""
Components = snap.TCnComV()
snap.GetSccs(G0, Components)
for CnCom in Components:
    print "Size of component: %d" % CnCom.Len()
"""
"""
DegToCntV = snap.TIntPrV()