Exemplo n.º 1
0
# Join
P = ringo.Join(POSTS, T, "PostId", "PostId")
t.show("join", P)

# Select Java posts
print ringo.GetSchema(P)
ringo.Select(P, '2.Tag = java', CompConstant=True)
t.show("select", P)

# Select Questions
Q = ringo.Select(P, '1.AnswerId != 0', InPlace=False, CompConstant=True)
t.show("select", Q)

# Select Answers
A = ringo.Select(P, '1.AnswerId = 0', InPlace=False, CompConstant=True)
t.show("select", A)

#print ringo.DumpTableContent(Q,5)
#print ringo.DumpTableContent(A,5)
#ringo.GenerateProvenance(Q, '06-StackOverflow-paper-autogen-Q.py')
#ringo.GenerateProvenance(A, '06-StackOverflow-paper-autogen-A.py')
# Join
QA = ringo.Join(Q, A, "1.AnswerId", "1.PostId")
t.show("join", QA)

print ringo.GetSchema(QA)
# Graph
graph = ringo.ToGraph(QA, "1.1.UserId", "2.1.UserId")
t.show("graph", graph)
ringo.GenerateProvenance(graph, '06-StackOverflow-paper-autogen.py')
Exemplo n.º 2
0
import ringo
import sys

src_file = sys.argv[1]
Schema = [('Id', 'int'), ('PostTypeId', 'int'), ('AcceptedAnswerId', 'int'),
          ('OwnerUserId', 'int'), ('Body', 'string'), ('Tag', 'string')]
ringo = ringo.Ringo()
P = ringo.LoadTableTSV(Schema, src_file, '\t', True)
ringo.Project(P,
              ['Id', 'PostTypeId', 'AcceptedAnswerId', 'OwnerUserId', 'Tag'])

JP = ringo.Select(P, "Tag = 'java'", False)
Q = ringo.Select(JP, 'PostTypeId = 1', False)
A = ringo.Select(JP, 'PostTypeId = 2', False)

QA = ringo.Join(Q, A, 'AcceptedAnswerId', 'Id')
G = ringo.ToGraph(QA, 'OwnerUserId-1', 'OwnerUserId-2')
PR_MAP = ringo.PageRank(G)  # a hash map object: node/user id -> PageRank score
PR = ringo.TableFromHashMap(PR_MAP, 'user', 'score')
PR = ringo.Order(PR, ['score'])
ringo.SaveTableTSV(PR, 'scores.tsv')
#ringo.SaveTableBinary(PR, 'scores')
ringo.GenerateProvenance(G, 'G.py')
Exemplo n.º 3
0
    except OSError:
        pass

ringo = ringo.Ringo()

t = testutils.Timer(ENABLE_TIMER)

t1 = ringo.LoadTableBinary('t1')
t3 = ringo.LoadTableBinary('t3')

# Join
t4 = ringo.Join(t3, t1, "AnswerId", "PostId")
t.show("join", t4)

# Graph
graph = ringo.ToGraph(t4, "1_2.1.UserId", "1.UserId")
t.show("graph", graph)
ringo.ShowMetadata(graph)

# Get authority scores
(HTHub, HTAuth) = ringo.GetHits(graph)
t.show("hits", graph)
ringo.ShowMetadata(HTAuth)

t5 = ringo.TableFromHashMap(HTAuth, "UserId", "Authority")
t.show("authority score", t5)

# Select top entries
#ringo.Select(t5, 'Authority > 0.0', CompConstant = True)
#t.show("select", t5)
Exemplo n.º 4
0
  print """Usage: python use_case_1.py source [destination]
  source: input DBLP .tsv file
  destination: file where the coauthorship network should be stored"""
  exit(1)
srcfile = sys.argv[1]
dstfile = sys.argv[2] if len(sys.argv) >= 3 else None

ringo = ringo.Ringo()

t = testutils.Timer(ENABLE_TIMER)
S = [("Key","string"), ("Author","string")]
T = ringo.LoadTableTSV(S, srcfile)
t.show("load")

T = ringo.SelfJoin(T, "Key")
t.show("join")

# TODO: use simpler conventions for column renaming
print ringo.GetSchema(T)
G = ringo.ToGraph(T, "1_1.Author","1_2.Author")
t.show("graph")

if not dstfile is None:
  G.Save(snap.TFOut(dstfile))
  t.show("save")

diameter = ringo.GetBfsEffDiam(G,N_TEST_NODES)
t.show("diameter (%d test nodes)" % N_TEST_NODES)

print "Diameter: {0:.5f}".format(diameter)