Пример #1
0
def main(args):
	if len(args) < 3:
		print(get_usage())
		sys.exit(1)

	votes = sys.argv[1]
	outFile = sys.argv[2]

	t = testutils.Timer(ENABLE_TIMER)
	context = snap.TTableContext()

	VoteS = snap.Schema()
	VoteS.Add(snap.TStrTAttrPr("UserId", snap.atInt))
	VoteS.Add(snap.TStrTAttrPr("AdminId", snap.atInt))
	TVotes = snap.TTable.LoadSS("WikiVotes", VoteS, votes, context, '\t', snap.TBool(False))
	t.show("load Votes", TVotes)

	GroupBy = snap.TStrV()
	GroupBy.Add("UserId")
	JointTable = TVotes.SelfSimJoinPerGroup(GroupBy, "AdminId", DISTANCE_ATTRIBUTE, snap.Jaccard, 0.5)
	t.show("SimJoinPerGroup complete", JointTable)

	JointTable.SelectAtomic("WikiVotes_1.UserId", "WikiVotes_2.UserId", snap.NEQ)
	t.show("Select complete", JointTable)

	testutils.dump(JointTable, 20);
	JointTable.SaveSS(outFile)
Пример #2
0
def main():
	S = snap.Schema()
	context = snap.TTableContext()

	S.Add(snap.TStrTAttrPr("Animal", snap.atStr))
	S.Add(snap.TStrTAttrPr("Size", snap.atStr))
	S.Add(snap.TStrTAttrPr("Location", snap.atStr))
	S.Add(snap.TStrTAttrPr("Number", snap.atInt))
	Animals = snap.TTable.LoadSS("Animals", S, "/dfs/ilfs2/0/ringo/tests/animals.txt", context, '\t', snap.TBool(False))

	# Gets animals with size=big
	pred_size = snap.TAtomicPredicate(snap.atStr, snap.TBool(True), snap.EQ, "Size", "", 0, 0, "big")
	node_size = snap.TPredicateNode(pred_size)

	# Get animals with location=Australia
	pred_location = snap.TAtomicPredicate(snap.atStr, snap.TBool(True), snap.EQ, "Location", "", 0, 0, "Australia")
	node_location = snap.TPredicateNode(pred_location)

	# size=big and location=Australia
	node1 = snap.TPredicateNode(snap.AND)
	node1.AddLeftChild(node_size)
	node1.AddRightChild(node_location)

	# Get animals with name==location (fabricated to show a non const case
	pred_animal_location = snap.TAtomicPredicate(snap.atStr, snap.TBool(False), snap.EQ, "Animal","Location")
	node2 = snap.TPredicateNode(pred_animal_location)

	# (size=big and location=Australia) or Animal==Location
	node_root = snap.TPredicateNode(snap.OR)
	node_root.AddLeftChild(node1)
	node_root.AddRightChild(node2)
	pred = snap.TPredicate(node_root)

	Animals.Select(pred)
	testutils.dump(Animals)
Пример #3
0
def main(args):
    if len(args) < 3:
        print(get_usage())
        sys.exit(1)

    votes = sys.argv[1]
    outFile = sys.argv[2]

    t = testutils.Timer(ENABLE_TIMER)
    context = snap.TTableContext()

    VoteS = snap.Schema()
    VoteS.Add(snap.TStrTAttrPr("UserId", snap.atInt))
    VoteS.Add(snap.TStrTAttrPr("AdminId", snap.atInt))
    TVotes = snap.TTable.LoadSS("WikiVotes", VoteS, votes, context, '\t',
                                snap.TBool(False))
    t.show("load Votes", TVotes)

    GroupBy = snap.TStrV()
    GroupBy.Add("UserId")
    JointTable = TVotes.SelfSimJoinPerGroup(GroupBy, "AdminId",
                                            DISTANCE_ATTRIBUTE, snap.Jaccard,
                                            0.5)
    t.show("SimJoinPerGroup complete", JointTable)

    JointTable.SelectAtomic("WikiVotes_1.UserId", "WikiVotes_2.UserId",
                            snap.NEQ)
    t.show("Select complete", JointTable)

    testutils.dump(JointTable, 20)
    JointTable.SaveSS(outFile)
Пример #4
0
def main(args):
	if len(args) < 3:
		print(get_usage())
		sys.exit(1)

	yelp = sys.argv[1]
	outFile = sys.argv[2]

	t = testutils.Timer(ENABLE_TIMER)
	context = snap.TTableContext()

	YelpS = snap.Schema()
	YelpS.Add(snap.TStrTAttrPr("Name", snap.atStr))
	YelpS.Add(snap.TStrTAttrPr("City", snap.atStr))
	YelpS.Add(snap.TStrTAttrPr("State", snap.atStr))
	YelpS.Add(snap.TStrTAttrPr("Latitude", snap.atFlt))
	YelpS.Add(snap.TStrTAttrPr("Longitude", snap.atFlt))

	TYelp = snap.TTable.LoadSS("Yelp", YelpS, yelp, context, '\t', snap.TBool(True));
	t.show("load Yelp", TYelp)

	Cols = snap.TStrV()
	Cols.Add("Latitude")
	Cols.Add("Longitude")

	# Get all business within 5 kilometers of each other
	JointTable = TYelp.SelfSimJoin(Cols, DISTANCE_ATTRIBUTE, snap.Haversine, 2)
	t.show("SimJoin complete", JointTable)

	ProjectionV = snap.TStrV()
	ProjectionV.Add("Yelp_1.Name")
	ProjectionV.Add("Yelp_1.City")
	ProjectionV.Add("Yelp_1.State")
	ProjectionV.Add("Yelp_2.Name")
	ProjectionV.Add("Yelp_2.City")
	ProjectionV.Add("Yelp_2.State")
	ProjectionV.Add(DISTANCE_ATTRIBUTE)

	JointTable.ProjectInPlace(ProjectionV)
	t.show("Project complete")

	testutils.dump(JointTable, 100);
	JointTable.SaveSS(outFile)
Пример #5
0
def main():
    S = snap.Schema()
    context = snap.TTableContext()

    S.Add(snap.TStrTAttrPr("Animal", snap.atStr))
    S.Add(snap.TStrTAttrPr("Size", snap.atStr))
    S.Add(snap.TStrTAttrPr("Location", snap.atStr))
    S.Add(snap.TStrTAttrPr("Number", snap.atInt))
    Animals = snap.TTable.LoadSS("Animals", S,
                                 "/dfs/ilfs2/0/ringo/tests/animals.txt",
                                 context, '\t', snap.TBool(False))

    # Gets animals with size=big
    pred_size = snap.TAtomicPredicate(snap.atStr, snap.TBool(True), snap.EQ,
                                      "Size", "", 0, 0, "big")
    node_size = snap.TPredicateNode(pred_size)

    # Get animals with location=Australia
    pred_location = snap.TAtomicPredicate(snap.atStr, snap.TBool(True),
                                          snap.EQ, "Location", "", 0, 0,
                                          "Australia")
    node_location = snap.TPredicateNode(pred_location)

    # size=big and location=Australia
    node1 = snap.TPredicateNode(snap.AND)
    node1.AddLeftChild(node_size)
    node1.AddRightChild(node_location)

    # Get animals with name==location (fabricated to show a non const case
    pred_animal_location = snap.TAtomicPredicate(snap.atStr, snap.TBool(False),
                                                 snap.EQ, "Animal", "Location")
    node2 = snap.TPredicateNode(pred_animal_location)

    # (size=big and location=Australia) or Animal==Location
    node_root = snap.TPredicateNode(snap.OR)
    node_root.AddLeftChild(node1)
    node_root.AddRightChild(node2)
    pred = snap.TPredicate(node_root)

    Animals.Select(pred)
    testutils.dump(Animals)
Пример #6
0
S.Add(snap.TStrTAttrPr("PostId", snap.atInt))
S.Add(snap.TStrTAttrPr("Tag", snap.atStr))
tags = snap.TTable.LoadSS("t2", S, tagsFile, context, '\t', snap.TBool(False))
t.show("load tags", tags)

# Select
# >>> tags.select('Tags = "python"')
tags.SelectAtomicStrConst("Tag", "python", snap.EQ)
t.show("select", tags)

# Join
# >>> questions = posts.join(tags)
questions = posts.Join("PostId", tags, "PostId")
t.show("join", questions)

testutils.dump(questions, 1)

# Project
# >>> questions.project(['PostId', 'UserId', 'AcceptedAnswerId'], in_place = True)
V = snap.TStrV()
V.Add("t1.PostId")
V.Add("t1.UserId")
V.Add("t1.AcceptedAnswerId")
questions.ProjectInPlace(V)
t.show("copy & project", questions)

# Rename
# >>> questions.rename('UserId', 'Asker')
questions.Rename("t1.UserId", "Asker")
t.show("rename", questions)
Пример #7
0
S.Add(snap.TStrTAttrPr("PostId", snap.atInt))
S.Add(snap.TStrTAttrPr("Tag", snap.atStr))
tags = snap.TTable.LoadSS("t2", S, tagsFile, context, '\t', snap.TBool(False))
t.show("load tags", tags)

# Select
# >>> tags.select('Tags = "python"')
tags.SelectAtomicStrConst("Tag", "python", snap.EQ)
t.show("select", tags)

# Join
# >>> questions = posts.join(tags)
questions = posts.Join("PostId", tags, "PostId")
t.show("join", questions)

testutils.dump(questions, 1)

# Project
# >>> questions.project(['PostId', 'UserId', 'AcceptedAnswerId'], in_place = True)
V = snap.TStrV()
V.Add("t1.PostId")
V.Add("t1.UserId")
V.Add("t1.AcceptedAnswerId")
questions.ProjectInPlace(V)
t.show("copy & project", questions)

# Rename
# >>> questions.rename('UserId', 'Asker')
questions.Rename("t1.UserId", "Asker")
t.show("rename", questions)
Пример #8
0
import sys
sys.path.append("../use-cases")
import snap
import testutils
import pdb

P1 = snap.TStrTAttrPr("col1", snap.atInt)
P2 = snap.TStrTAttrPr("col2", snap.atInt)
S = snap.Schema()
S.Add(P1)
S.Add(P2)

Context = snap.TTableContext()
T1 = snap.TTable.LoadSS("1", S, "test2.tsv", Context)
testutils.dump(T1)

V = snap.TStrV()
V.Add("col1")
T2 = T1.Project(V, "2")
testutils.dump(T2)

V = snap.TStrV()
V.Add("col2")
T3 = T1.Project(V, "3")
testutils.dump(T3)

T3.Rename("col2", "col1")
T4 = T2.Union(T3, "4")
testutils.dump(T4)
t.show("graph", graph)

# Get authority scores
HTHub = snap.TIntFltH()
HTAuth = snap.TIntFltH()
snap.GetHits(graph, HTHub, HTAuth)
t.show("hits", graph)

t5 = snap.TTable.TableFromHashMap("t5", HTAuth, "UserId", "Authority", context,
                                  snap.TBool(False))
t.show("authority score", t5)

# Select top entries
# >>> t.select('Authority > 0.0')
#t5.SelectAtomicFltConst("Authority", 0.0, snap.GT)
#t.show("select", t5)

# Order by final score (in descending order)
# >>> t5.order(['Authority'], desc = True)
V = snap.TStrV()
V.Add("Authority")
t5.Order(V, "", snap.TBool(False), snap.TBool(False))
t.show("order", t5)

# Save
if not dstdir is None:
    t5.SaveSS(os.path.join(dstdir, OUTPUT_TABLE_FILENAME))
    t.show("save", t5)

testutils.dump(t5, 20)
# >>> t5.Count('CommentScore', 'UserId')
# >>> t5.Unique()
t5.Count("UserId", "ExpertCount")
pdb.set_trace()
V = snap.TStrV()
V.Add("UserId")
t5.Unique(V)
t.show("count", t5)

pdb.set_trace()

# Select
# >>> t5.select('ExpertCount >= 5')
t5.SelectAtomicIntConst("ExpertCount", 5, snap.GTE)
t.show("select", t5)

# Project
# >>> t5 = t5.project(['UserId'])
V = snap.TStrV()
V.Add("UserId")
t5.ProjectInPlace(V)
t.show("project", t5)

# Save
if not destFile is None:
  t5.SaveSS(destFile)
  t.show("save", t5)

testutils.dump(t5)

Пример #11
0
S.Add(snap.TStrTAttrPr("Key", snap.atStr))
S.Add(snap.TStrTAttrPr("Year", snap.atInt))
year = snap.TTable.LoadSS("2", S, yearFile, context, '\t', snap.TBool(False))
t.show("load year table", year)

# Select
# >>> year.select('Year >= 2005')
year.SelectAtomicIntConst("Year", 2005, snap.GTE)
t.show("select", year)

# Join
# >>> table = authors.join(year, ['Key'], ['Key'])
table = authors.Join("Key", year, "Key")
t.show("join", table)

testutils.dump(table, 2)

# Self-join
# >>> table.selfjoin(table, ['Key'])
table = table.SelfJoin("Key")
t.show("join", table)

# Select
# >>> table.select('Author_1 != Author_2')
table.SelectAtomic("1_2_1.1.Author", "1_2_2.1.Author", snap.NEQ)
t.show("select", table)

# Create network
# >>> table.graph('Author_1', 'Author_2', directed=False)
table.SetSrcCol("1_2_1.1.Author")
table.SetDstCol("1_2_2.1.Author")
Пример #12
0
t.show("graph", graph)
#graph.Dump()

# Get authority scores
HTHub = snap.TIntFltH()
HTAuth = snap.TIntFltH()
snap.GetHits(graph, HTHub, HTAuth)
t.show("hits", graph)

t5 = snap.TTable.TableFromHashMap("t5", HTAuth, "UserId", "Authority", context, snap.TBool(False))
t.show("authority score", t5)

# Select top entries
# >>> t.select('Authority > 0.0')
#t5.SelectAtomicFltConst("Authority", 0.0, snap.GT)
#t.show("select", t5)

# Order by final score (in descending order)
# >>> t5.order(['Authority'], desc = True)
V = snap.TStrV()
V.Add("Authority")
t5.Order(V, "", snap.TBool(False), snap.TBool(False))
t.show("order", t5)

# Save
if not destFile is None:
  t5.SaveSS(destFile)
  t.show("save", t5)

testutils.dump(t5, 20)
Пример #13
0
import sys
sys.path.append("../use-cases")
import snap
import testutils
import pdb

P1 = snap.TStrTAttrPr("col1", snap.atInt)
P2 = snap.TStrTAttrPr("col2", snap.atInt)
S = snap.Schema()
S.Add(P1)
S.Add(P2)

Context = snap.TTableContext()
T1 = snap.TTable.LoadSS("1", S, "test2.tsv", Context)
testutils.dump(T1)

V = snap.TStrV()
V.Add("col1")
T2 = T1.Project(V, "2")
testutils.dump(T2)

V = snap.TStrV()
V.Add("col2")
T3 = T1.Project(V, "3")
testutils.dump(T3)

T3.Rename("col2","col1")
T4 = T2.Union(T3, "4")
testutils.dump(T4)
Пример #14
0
import sys
sys.path.append("../utils")

import snap
import testutils

if __name__ == '__main__':

    srcfile = '/dfs/ilfs2/0/ringo/StackOverflow_joined/debug.tsv'

    context = snap.TTableContext()

    print "Loading table..."
    schema = snap.Schema()
    schema.Add(snap.TStrTAttrPr("Val", snap.atInt))
    table = snap.TTable.LoadSS("1", schema, srcfile, context, "\t", snap.TBool(False))

    print "Selecting rows with val == 0 in place..."
    table.SelectAtomicIntConst("Val", 0, snap.EQ)
    print "Number of rows in result: %d" % table.GetNumValidRows()
    print "10 first rows of table:"
    testutils.dump(table, 10)
# Count
# >>> t5.Count('CommentScore', 'UserId')
# >>> t5.Unique()
t5.Count("UserId", "ExpertCount")
pdb.set_trace()
V = snap.TStrV()
V.Add("UserId")
t5.Unique(V)
t.show("count", t5)

pdb.set_trace()

# Select
# >>> t5.select('ExpertCount >= 5')
t5.SelectAtomicIntConst("ExpertCount", 5, snap.GTE)
t.show("select", t5)

# Project
# >>> t5 = t5.project(['UserId'])
V = snap.TStrV()
V.Add("UserId")
t5.ProjectInPlace(V)
t.show("project", t5)

# Save
if not destFile is None:
    t5.SaveSS(destFile)
    t.show("save", t5)

testutils.dump(t5)