# Join P = ringo.Join(POSTS, T, "PostId", "PostId") t.show("join", P) # Select Java posts print ringo.GetSchema(P) ringo.Select(P, '2.Tag = java', CompConstant=True) t.show("select", P) # Select Questions Q = ringo.Select(P, '1.AnswerId != 0', InPlace=False, CompConstant=True) t.show("select", Q) # Select Answers A = ringo.Select(P, '1.AnswerId = 0', InPlace=False, CompConstant=True) t.show("select", A) #print ringo.DumpTableContent(Q,5) #print ringo.DumpTableContent(A,5) #ringo.GenerateProvenance(Q, '06-StackOverflow-paper-autogen-Q.py') #ringo.GenerateProvenance(A, '06-StackOverflow-paper-autogen-A.py') # Join QA = ringo.Join(Q, A, "1.AnswerId", "1.PostId") t.show("join", QA) print ringo.GetSchema(QA) # Graph graph = ringo.ToGraph(QA, "1.1.UserId", "2.1.UserId") t.show("graph", graph) ringo.GenerateProvenance(graph, '06-StackOverflow-paper-autogen.py')
import ringo import sys src_file = sys.argv[1] Schema = [('Id', 'int'), ('PostTypeId', 'int'), ('AcceptedAnswerId', 'int'), ('OwnerUserId', 'int'), ('Body', 'string'), ('Tag', 'string')] ringo = ringo.Ringo() P = ringo.LoadTableTSV(Schema, src_file, '\t', True) ringo.Project(P, ['Id', 'PostTypeId', 'AcceptedAnswerId', 'OwnerUserId', 'Tag']) JP = ringo.Select(P, "Tag = 'java'", False) Q = ringo.Select(JP, 'PostTypeId = 1', False) A = ringo.Select(JP, 'PostTypeId = 2', False) QA = ringo.Join(Q, A, 'AcceptedAnswerId', 'Id') G = ringo.ToGraph(QA, 'OwnerUserId-1', 'OwnerUserId-2') PR_MAP = ringo.PageRank(G) # a hash map object: node/user id -> PageRank score PR = ringo.TableFromHashMap(PR_MAP, 'user', 'score') PR = ringo.Order(PR, ['score']) ringo.SaveTableTSV(PR, 'scores.tsv') #ringo.SaveTableBinary(PR, 'scores') ringo.GenerateProvenance(G, 'G.py')
except OSError: pass ringo = ringo.Ringo() t = testutils.Timer(ENABLE_TIMER) t1 = ringo.LoadTableBinary('t1') t3 = ringo.LoadTableBinary('t3') # Join t4 = ringo.Join(t3, t1, "AnswerId", "PostId") t.show("join", t4) # Graph graph = ringo.ToGraph(t4, "1_2.1.UserId", "1.UserId") t.show("graph", graph) ringo.ShowMetadata(graph) # Get authority scores (HTHub, HTAuth) = ringo.GetHits(graph) t.show("hits", graph) ringo.ShowMetadata(HTAuth) t5 = ringo.TableFromHashMap(HTAuth, "UserId", "Authority") t.show("authority score", t5) # Select top entries #ringo.Select(t5, 'Authority > 0.0', CompConstant = True) #t.show("select", t5)
print """Usage: python use_case_1.py source [destination] source: input DBLP .tsv file destination: file where the coauthorship network should be stored""" exit(1) srcfile = sys.argv[1] dstfile = sys.argv[2] if len(sys.argv) >= 3 else None ringo = ringo.Ringo() t = testutils.Timer(ENABLE_TIMER) S = [("Key","string"), ("Author","string")] T = ringo.LoadTableTSV(S, srcfile) t.show("load") T = ringo.SelfJoin(T, "Key") t.show("join") # TODO: use simpler conventions for column renaming print ringo.GetSchema(T) G = ringo.ToGraph(T, "1_1.Author","1_2.Author") t.show("graph") if not dstfile is None: G.Save(snap.TFOut(dstfile)) t.show("save") diameter = ringo.GetBfsEffDiam(G,N_TEST_NODES) t.show("diameter (%d test nodes)" % N_TEST_NODES) print "Diameter: {0:.5f}".format(diameter)