# Graph graph = ringo.ToGraph(t4, "1_2.1.UserId", "1.UserId") t.show("graph", graph) ringo.ShowMetadata(graph) # Get authority scores (HTHub, HTAuth) = ringo.GetHits(graph) t.show("hits", graph) ringo.ShowMetadata(HTAuth) t5 = ringo.TableFromHashMap(HTAuth, "UserId", "Authority") t.show("authority score", t5) # Select top entries #ringo.Select(t5, 'Authority > 0.0', CompConstant = True) #t.show("select", t5) # Order by final score (in descending order) ringo.Order(t5, ['Authority'], Asc=False) t.show("order", t5) # Save if not dstdir is None: ringo.SaveTableTSV(t5, os.path.join(dstdir, OUTPUT_TABLE_FILENAME)) t.show("save", t5) ringo.DumpTableContent(t5, 20) ringo.ShowMetadata(t5) ringo.GenerateProvenance(t5, '06-StackOverflow-simple-autogen.py')
t.show("merge collab", Tcollab_merge) # If (u,v) worked on the same pull request on the same repository, they are added # as (soft) collaborators. Tpull_merge = ringo.SelfJoin(Tpull, "owner") ringo.Select(Tpull_merge, "3_1.name = 3_2.name", True) ringo.Select(Tpull_merge, "3_1.pullid = 3_2.pullid", True) ringo.ColMin(Tpull_merge, "3_1.created_at", "3_2.created_at", "created_at") ringo.Project(Tpull_merge, ("3_1.userid", "3_2.userid", "created_at")) ringo.Rename(Tpull_merge, "3_1.userid", "userid1") ringo.Rename(Tpull_merge, "3_2.userid", "userid2") t.show("merge pull", Tpull_merge) Tmerge = ringo.UnionAll(Tcollab_merge, Tpull_merge, "collab") # Remove self-loops from the table. ringo.Select(Tmerge, "userid1 != userid2") # Select the base and delta tables from the merged table. Tbase = ringo.Select(Tmerge, "created_at >= 10", False, True) #TODO: Iterate over the rows and add (userid, owner) edge t.show("collab union") # Convert base table to base graph Gbase = ringo.ToGraph(Tbase, "userid1", "userid2") t.show("base graph", Gbase) TPageRank = ringo.PageRank(Gbase) TPageRank = ringo.Order(TPageRank, PAGE_RANK_ATTRIBUTE) ringo.DumpTableContent(TPageRank)
import ringo import sys src_file = sys.argv[1] Schema = [('Id', 'int'), ('PostTypeId', 'int'), ('AcceptedAnswerId', 'int'), ('OwnerUserId', 'int'), ('Body', 'string'), ('Tag', 'string')] ringo = ringo.Ringo() P = ringo.LoadTableTSV(Schema, src_file, '\t', True) ringo.Project(P, ['Id', 'PostTypeId', 'AcceptedAnswerId', 'OwnerUserId', 'Tag']) JP = ringo.Select(P, "Tag = 'java'", False) Q = ringo.Select(JP, 'PostTypeId = 1', False) A = ringo.Select(JP, 'PostTypeId = 2', False) QA = ringo.Join(Q, A, 'AcceptedAnswerId', 'Id') G = ringo.ToGraph(QA, 'OwnerUserId-1', 'OwnerUserId-2') PR_MAP = ringo.PageRank(G) # a hash map object: node/user id -> PageRank score PR = ringo.TableFromHashMap(PR_MAP, 'user', 'score') PR = ringo.Order(PR, ['score']) ringo.SaveTableTSV(PR, 'scores.tsv') #ringo.SaveTableBinary(PR, 'scores') ringo.GenerateProvenance(G, 'G.py')