def main(): g = Graph() g.set_directed(False) db = DBModel.get() g.vertex_properties["subscribers"] = g.new_vertex_property("int") g.vertex_properties["name"] = g.new_vertex_property("string") vertices = dict() # add all vertices to the graph for subreddit in db.get_subreddits(): v = g.add_vertex() g.vertex_properties["name"][v] = subreddit[0] g.vertex_properties["subscribers"][v] = subreddit[1] vertices[subreddit[0]] = v # add all the edges for link in db.get_all_links(): v1, v2 = link source = vertices[v1] target = vertices[v2] g.add_edge(source, target) db.close() g.save("data/reddit.gml")
def main(): model = DBModel.get() print "[ ] Connecting.." r = praw.Reddit(user_agent="/u/benediktkr/") print "[ ] Logging in.." r.login(username="******", password="******") print "[ ] Starting.." try: while True: # praw follows the guidelines on ratelimits print " [ ] Fetching comments.." comments = list(r.get_comments("all", limit=None)) print " [+] Fetched: {0} comments.".format(len(comments)) print " [ ] Processing" for comment in comments: username = str(comment.author) subreddit = "/r/" + str(comment.subreddit).lower() + "/" model.save_comment(username, subreddit) print " [+] Done" except KeyboardInterrupt: model.close() print "\n[!]Exiting"
def bfs(start_node): f = open('data/reddit.txt', 'w') db = DBModel.get() todo = deque() visited = set() todo.append(start) while len(todo) > 0: here = todo.popleft() if here in visited: continue visited.add(here) this_subreddit = parse_sidebar(here) db.save_subreddit(this_subreddit) # Handle stuff about this node. for subreddit in this_subreddit['links']: if subreddit not in visited: todo.append(subreddit) link = (here, subreddit) print link db.save_link(link) db.close() f.close()