def fetch_facebook_post_comments(self, graph, id, limit=20): ''' Fetches comments based on post id nodes: - comment - post - user edges: - comment ''' # search post based on post id post = self.graph.get_object( id, fields= 'full_picture,message,caption,created_time,comments{id,from,message,created_time}' ) # get the data for array of comments comment_data = post['comments']['data'] # create a post object post_node = Post(post) # create a new node for post graph.create_node(post_node) i = 0 for comment in comment_data: # check if reach the limit if i == limit: break i += 1 # create a comment object comment_node = Comment(comment) # create a new node for comment graph.create_node(comment_node) # create an edge bewteen comment id and post id graph.create_edge( Edge(comment_node.get_id(), post_node.get_id(), "COMMENTED")) user = { 'id': comment_node.get_creator_id(), 'name': comment_node.get_creator_name() } # create an user object user_node = User(user) # create a new node for user graph.create_node(user_node) # create an edge bewteen user id and comment id graph.create_edge( Edge(user_node.get_id(), comment_node.get_id(), "COMMENTED"))
def fetch_tweet_by_id(self, graph, id): url = "https://api.twitter.com/1.1/statuses/show.json?id=" + str(id) http = httplib2.Http() headers = { 'Authorization': 'Bearer ' + self.bearer_token } response, content = http.request(url, method="GET", headers=headers) result = json.loads(content.decode()) tweet = TwitterTweet(result) graph.create_node(tweet) creator = TwitterUser(result["user"]) graph.create_node(creator) graph.create_edge(Edge(tweet.get_id(), creator.get_id(), "CREATED_BY")) graph.create_edge(Edge(creator.get_id(), tweet.get_id(), "CREATED"))
def fetch_posts_tagged( self, graph, tag, limit=20, before=0, filter="" ): """ Fetches posts and their publishers (blogs) with a given tag nodes: - blog - post edges: - PUBLISHED """ posts_tagged = self.tumblr.tagged(tag=tag, limit=limit, before=before, filter=filter) # Create a pair of nodes for each post-blog pair for post_tagged in posts_tagged: graph.create_node(Post(post_tagged)) blog = self.tumblr.blog_info(post_tagged['blog_name'])['blog'] graph.create_node(Blog(blog)) graph.create_edge(Edge(blog['name'], str(post_tagged['id']), "PUBLISHED"))
def fetch_followers( self, graph, blog_name, limit=20, offset=0 ): """ Fetches blogs following the blog with the identifier "blog_name" nodes: - blog edges: - FOLLOWER """ followers_raw = self.tumblr.followers(blog_name, limit=limit, offset=offset) try: followers = followers_raw ['users'] blog = self.tumblr.blog_info(blog_name)['blog'] # Create a graph for each node graph.create_node(Blog(blog)) for follower in followers: graph.create_node(Blog(follower)) graph.create_edge(Edge(blog['name'], follower['name'], "FOLLOWER")) except KeyError as error: print(error) print(followers_raw)
def fetch_published_posts( self, graph, blog_name, type=None, tag="", limit=20, offset=0 ): """ Fetches posts published by the blog with the identifier "blog_name" nodes: - blog - post edges: - PUBLISHED """ published_posts_raw = self.tumblr.posts(blog_name, type=type, tag=tag, limit=limit, offset=offset) try: published_posts = published_posts_raw['posts'] blog = self.tumblr.blog_info(blog_name)['blog'] graph.create_node(Blog(blog)) # Create a post node for each post for published_post in published_posts: graph.create_node(Post(published_post)) graph.create_edge(Edge(blog['name'], str(published_post['id']), "PUBLISHED")) except KeyError as error: print(error) print(published_posts_raw)
def fetch_liked_posts( self, graph, blog_name, limit=20, offset=0, before=0, after=0 ): """ Fetches posts liked by the blog with the identifier "blog_name" nodes: - blog - post edges: - LIKED """ liked_posts_raw = self.tumblr.blog_likes(blog_name, limit=limit, offset=offset, before=before, after=after) try: liked_posts = liked_posts_raw['liked_posts'] blog = self.tumblr.blog_info(blog_name)['blog'] # Create a node for the blog blog_node = Blog(blog) graph.create_node(blog_node) # Create a node for each post for liked_post in liked_posts: post_node = Post(liked_post) graph.create_node(post_node) graph.create_edge(Edge(blog_node.get_id(), str(post_node.get_id()), "LIKED")) except KeyError as error: print(error) print(liked_posts_raw)
def fetch_blogs_following( self, graph, blog_name, limit=20, offset=0 # From which blog the fetch starts ): """ Fetches blogs followed by the blog with the identifier "blog_name" nodes: - blog edges: - FOLLOWING """ blogs_following_raw = self.tumblr.blog_following(blog_name, limit=limit, offset=offset) try: blogs_following = blogs_following_raw['blogs'] blog = self.tumblr.blog_info(blog_name)['blog'] # Create a node for each blog graph.create_node(Blog(blog)) for blog_following in blogs_following: graph.create_node(Blog(blog_following)) graph.create_edge(Edge(blog['name'], blog_following['name'], "FOLLOWING")) except KeyError as error: print(error) print(blogs_following_raw)
def fecth_followers_by_screenname(self, graph, screenname, limit=15): user = self.get_single_user_by_screenname(graph, screenname) graph.create_node(user) url = "https://api.twitter.com/1.1/followers/list.json?screen_name=" + screenname + "&count=" + str(limit) http = httplib2.Http() headers = { 'Authorization': 'Bearer ' + self.bearer_token } response, content = http.request(url, method="GET", headers=headers) result = json.loads(content.decode()) for follower in result["users"]: single_follower = TwitterUser(follower) graph.create_node(single_follower) graph.create_edge(Edge(single_follower.get_id(), user.get_id(), "FOLLOW")) graph.create_edge(Edge(user.get_id(), single_follower.get_id(), "FOLLOWED BY"))
def fetch_pinterest_board_by_url(self, graph, board_url): board_result = self.get_single_board(board_url) board = PinterestBoard(board_result["data"]) graph.create_node(board) creator_username = board_result["data"]["creator"]["url"].split('/')[3] user_result = self.get_single_user(creator_username) user = PinterestUser(user_result["data"]) graph.create_node(user) graph.create_edge(Edge(board.get_id(), user.get_id(), "CREATED_BY")) graph.create_edge(Edge(user.get_id(), board.get_id(), "CREATED")) pin_result = self.get_pins_from_board(board_url) for pin in pin_result["data"]: single_pin_result = self.get_single_pin(pin["id"]) single_pin = PinterestPin(single_pin_result["data"]) graph.create_node(single_pin) graph.create_edge(Edge(board.get_id(), single_pin.get_id(), "HAS")) graph.create_edge(Edge(single_pin.get_id(), board.get_id(), "ON"))
def fetch_pinterest_my_boards(self, graph): url = "https://api.pinterest.com/v1/me/?access_token=" + self.access_token + \ "&fields=first_name%2Cid%2Clast_name%2Curl%2Cbio%2Caccount_type%2Ccounts%2Ccreated_at%2Cimage%2Cusername" http = httplib2.Http() response, content = http.request(url, method="GET") result = json.loads(content.decode()) user = PinterestUser(result["data"]) graph.create_node(user) url = "https://api.pinterest.com/v1/me/boards/?access_token=" + self.access_token + \ "&fields=id%2Cname%2Curl%2Ccounts%2Ccreated_at%2Ccreator%2Cdescription%2Cimage%2Cprivacy" http = httplib2.Http() response, content = http.request(url, method="GET") result = json.loads(content.decode()) for myboard in result["data"]: board = PinterestBoard(myboard) graph.create_node(board) graph.create_edge(Edge(board.get_id(), user.get_id(), "CREATED_BY")) graph.create_edge(Edge(user.get_id(), board.get_id(), "CREATED"))
def fetch_pinterest_my_pins(self, graph): url = "https://api.pinterest.com/v1/me/?access_token=" + self.access_token + \ "&fields=first_name%2Cid%2Clast_name%2Curl%2Cbio%2Caccount_type%2Ccounts%2Ccreated_at%2Cimage%2Cusername" http = httplib2.Http() response, content = http.request(url, method="GET") result = json.loads(content.decode()) user = PinterestUser(result["data"]) graph.create_node(user) url = "https://api.pinterest.com/v1/me/pins/?access_token=" + self.access_token + \ "&fields=id%2Clink%2Cnote%2Curl%2Cattribution%2Cboard%2Ccolor%2Coriginal_link%2Ccounts%2Ccreated_at%2Ccreator%2Cimage%2Cmedia" http = httplib2.Http() response, content = http.request(url, method="GET") result = json.loads(content.decode()) for mypin in result["data"]: pin = PinterestPin(mypin) graph.create_node(pin) graph.create_edge(Edge(pin.get_id(), user.get_id(), "CREATED_BY")) graph.create_edge(Edge(user.get_id(), pin.get_id(), "CREATED"))
def fetch_pinterest_pin_by_id(self, graph, pin_id): pin_result = self.get_single_pin(pin_id) pin = PinterestPin(pin_result["data"]) graph.create_node(pin) creator_username = pin_result["data"]["creator"]["url"].split('/')[3] user_result = self.get_single_user(creator_username) user = PinterestUser(user_result["data"]) graph.create_node(user) graph.create_edge(Edge(pin.get_id(), user.get_id(), "CREATED_BY")) graph.create_edge(Edge(user.get_id(), pin.get_id(), "CREATED")) board_url = pin_result["data"]["board"]["url"].split( '/')[3] + "/" + pin_result["data"]["board"]["url"].split('/')[4] board_result = self.get_single_board(board_url) board = PinterestBoard(board_result["data"]) graph.create_node(board) graph.create_edge(Edge(pin.get_id(), board.get_id(), "ON")) graph.create_edge(Edge(board.get_id(), pin.get_id(), "HAS"))
def fetch_tweets_by_topic(self, graph, keyword, limit=15): url = "https://api.twitter.com/1.1/search/tweets.json?q=" + keyword + "&count=" + str(limit) http = httplib2.Http() headers = { 'Authorization': 'Bearer ' + self.bearer_token } response, content = http.request(url, method="GET", headers=headers) result = json.loads(content.decode()) for tweet in result["statuses"]: single_tweet = TwitterTweet(tweet) graph.create_node(single_tweet) creator = TwitterUser(tweet["user"]) graph.create_node(creator) graph.create_edge(Edge(single_tweet.get_id(), creator.get_id(), "CREATED_BY")) graph.create_edge(Edge(creator.get_id(), single_tweet.get_id(), "CREATED")) if 'retweeted_status' in tweet: original_tweet = TwitterTweet(tweet["retweeted_status"]) graph.create_node(original_tweet) graph.create_edge(Edge(single_tweet.get_id(), original_tweet.get_id(), "RETWEET")) graph.create_edge(Edge(original_tweet.get_id(), single_tweet.get_id(), "RETWEETED BY")) if 'quoted_status' in tweet: original_tweet = TwitterTweet(tweet["quoted_status"]) graph.create_node(original_tweet) graph.create_edge(Edge(single_tweet.get_id(), original_tweet.get_id(), "QUOTE")) graph.create_edge(Edge(original_tweet.get_id(), single_tweet.get_id(), "QUOTED BY"))
def fetch_redditor_comments(self, graph, username, limit=20, sort="new", time_filter="month"): """ Fetches comments a redditor has posted nodes: - redditor - comment edges: - comment """ url = "https://oauth.reddit.com/user/" + username + "/comments" params = {"sort": sort, "limit": limit, "t": time_filter} response = self.get_request(url, params) # Redditor Node redditor = self.request_info(USER_API_URL, username) graph.create_node(Redditor(redditor)) comments = response["data"]["children"] for comment in comments: if comment["kind"] == "more": continue comment = comment["data"] # Comment Node graph.create_node(Comment(comment)) # Edges graph.create_edge(Edge(redditor["id"], comment["id"], "REPLIED")) graph.create_edge( Edge(comment["id"], redditor["id"], "COMMENT_CREATED_BY"))
def fetch_video_by_id_with_comments(self, graph, video_id): """ This method fetches a video with its comments and its creator (channel node) and also all the creator (channel node) for the comments And then add to the graph :param graph: The graph we are passing in :param video_id: The id of the video :return: The resulting graph """ response = self.youtube.videos().list( part='snippet,contentDetails,statistics', id=video_id).execute() # Add video node to the graph graph.create_node(VideoNode(response['items'][0])) # Add its author channel_id = response['items'][0]['snippet']['channelId'] graph.create_node(self.create_node_by_channel_id(channel_id)) graph.create_edge(Edge(channel_id, video_id, "createvideo")) # fetch its comments comments_results = self.youtube.commentThreads().list( part="snippet,replies", videoId=video_id, textFormat="plainText").execute() comment_threads = comments_results["items"] for comment_thread in comment_threads: top_level_comment = comment_thread['snippet']['topLevelComment'] top_level_comment_id = top_level_comment['id'] top_level_channel_id = top_level_comment['snippet'][ 'authorChannelId']['value'] graph.create_node( self.create_node_by_channel_id(top_level_channel_id)) graph.create_node(CommentNode(top_level_comment)) graph.create_edge( Edge(top_level_channel_id, top_level_comment_id, "comment")) graph.create_edge(Edge(top_level_comment_id, video_id, "comment")) if 'replies' in comment_thread: replies = comment_thread['replies']['comments'] for reply in replies: reply_id = reply['id'] reply_channel_id = reply['snippet']['authorChannelId'][ 'value'] # connect reply to comment graph.create_node(CommentNode(reply)) graph.create_edge( Edge(reply_id, top_level_comment_id, "comment")) # connect user to reply graph.create_node( self.create_node_by_channel_id(reply_channel_id)) graph.create_edge( Edge(reply_channel_id, reply_id, "comment")) graph.create_edge(Edge(reply_id, video_id, "comment"))
def fetch_pinterest_my_following_boards(self, graph): url = "https://api.pinterest.com/v1/me/?access_token=" + self.access_token + \ "&fields=first_name%2Cid%2Clast_name%2Curl%2Caccount_type%2Cusername%2Cbio%2Ccounts%2Ccreated_at%2Cimage" http = httplib2.Http() response, content = http.request(url, method="GET") result = json.loads(content.decode()) user = PinterestUser(result["data"]) graph.create_node(user) url = "https://api.pinterest.com/v1/me/following/boards/?access_token=" + self.access_token + \ "&fields=id%2Cname%2Curl%2Ccounts%2Ccreated_at%2Ccreator%2Cdescription%2Cimage%2Cprivacy" http = httplib2.Http() response, content = http.request(url, method="GET") result = json.loads(content.decode()) for myfollowingboard in result["data"]: followingboard = PinterestBoard(myfollowingboard) graph.create_node(followingboard) graph.create_edge(Edge(user.get_id(), followingboard.get_id(), "FOLLOWING")) creator_username = myfollowingboard["creator"]["url"].split('/')[3] creator_result = self.get_single_user(creator_username) creator = PinterestUser(creator_result["data"]) graph.create_node(creator) graph.create_edge(Edge(followingboard.get_id(), creator.get_id(), "CREATED_BY")) graph.create_edge(Edge(creator.get_id(), followingboard.get_id(), "CREATED")) board_url = myfollowingboard["url"].split( '/')[3] + "/" + myfollowingboard["url"].split('/')[4] pin_result = self.get_pins_from_board(board_url) for pin in pin_result["data"]: single_pin_result = self.get_single_pin(pin["id"]) single_pin = PinterestPin(single_pin_result["data"]) graph.create_node(single_pin) graph.create_edge(Edge(followingboard.get_id(), single_pin.get_id(), "HAS")) graph.create_edge(Edge(single_pin.get_id(), followingboard.get_id(), "ON"))
def process_positions(self, graph, profile): """ This method processes the profile json and generates several position nodes and add to the graph :param graph: The graph we are passing in :param profile: The profile json :return: The graph """ positions = profile["positions"] num = positions["_total"] positions_array = positions["values"] for i in range(num): position = positions_array[i] position_node = LinkedinPosition(position) graph.create_node(position_node) graph.create_edge( Edge(str(profile["id"]), str(position["id"]), "hasPosition"))
def fetch_pinterest_my_following_users(self, graph): url = "https://api.pinterest.com/v1/me/?access_token=" + self.access_token + \ "&fields=first_name%2Cid%2Clast_name%2Curl%2Caccount_type%2Cusername%2Cbio%2Ccounts%2Ccreated_at%2Cimage" http = httplib2.Http() response, content = http.request(url, method="GET") result = json.loads(content.decode()) user = PinterestUser(result["data"]) graph.create_node(user) url = "https://api.pinterest.com/v1/me/following/users/?access_token=" + self.access_token + \ "&fields=first_name%2Cid%2Clast_name%2Curl%2Caccount_type%2Cusername%2Cbio%2Ccounts%2Ccreated_at%2Cimage" http = httplib2.Http() response, content = http.request(url, method="GET") result = json.loads(content.decode()) for myfollowing in result["data"]: following = PinterestUser(myfollowing) graph.create_node(following) graph.create_edge(Edge(user.get_id(), following.get_id(), "FOLLOWING"))
def fetch_facebook_user_liked_pages(self, graph, id, limit=20): ''' Fetches liked pages based on user id nodes: - pages - user edges: - pages ''' # search user's liked pages based on user id result = self.graph.get_object( id, fields= 'name,likes{about,id,created_time,name,description,category,website}' ) # search a user based on user id user = self.graph.get_object( id, fields='name,id,gender,birthday,hometown,email') # create an user object user_node = User(user) # create a new node for user graph.create_node(user_node) # get the data for array of liked pages like_data = result['likes']['data'] i = 0 # create community node for every post the user liked for community in like_data: # check if reach the limit if i == limit: break i += 1 community_node = Community(community) graph.create_node(community_node) graph.create_edge( Edge(user_node.get_id(), community_node.get_id(), "LIKED"))
def fetch_videos_by_topic(self, graph, topic, maxResult=25): """ This method grabs videos about a certain topic and their corresponding creator (channel) and add to the graph :param graph: The graph we are passing in :param topic: The keyword for query :param maxResult: Max result for query, default 25 :return: The resulting graph """ search_response = self.youtube.search().list( q=topic, type='video', part='id,snippet', maxResults=maxResult).execute() for search_result in search_response.get('items', []): if search_result['id']['kind'] == 'youtube#video': channel_id = search_result['snippet']['channelId'] video_id = search_result['id']['videoId'] graph.create_node(VideoNode(search_result)) graph.create_node(self.create_node_by_channel_id(channel_id)) graph.create_edge(Edge(channel_id, video_id, "createvideo"))
def fetch_facebook_community_feed(self, graph, id, limit=20): ''' Fetches feeds based on community id nodes: - post - community edges: - post ''' # search user's post based on user id result = self.graph.get_object( id, fields='name,feeds{full_picture,message,caption,created_time}') # search a community based on community id community = self.graph.get_object( id, fields='about,id,created_time,name,description,category,website') # create an community object community_node = Community(community) # create a new node for user graph.create_node(community_node) # get the data for array of feeds post_data = result['feeds']['data'] i = 0 # create post node for every post the user created for post in post_data: # check if reach the limit if i == limit: break i += 1 post_node = Post(post) graph.create_node(post_node) graph.create_edge( Edge(community_node.get_id(), post_node.get_id(), "POSTED"))
def fetch_playlists_by_topic(self, graph, topic, maxResult=25): """ This method fetches playlists with a given topic and all their creators :param graph: The graph we are passing in :param topic: The query topic :param maxResult: The max result number, default 25 :return: The resulting graph """ search_response = self.youtube.search().list( q=topic, type='playlist', part='id,snippet', maxResults=maxResult).execute() for search_result in search_response.get('items', []): if search_result['id']['kind'] == 'youtube#playlist': playlist_id = search_result['id']['playlistId'] graph.create_node(PlaylistNode(search_result)) channel_id = search_result['snippet']['channelId'] graph.create_node(self.create_node_by_channel_id(channel_id)) graph.create_edge( Edge(channel_id, playlist_id, "createPlaylist"))
def fetch_facebook_user_created_post(self, graph, id, limit=20): ''' Fetches posts based on user id nodes: - post - user edges: - post ''' # search user's post based on user id result = self.graph.get_object( id, fields='name,posts{full_picture,message,caption,created_time}') # search a user based on user id user = self.graph.get_object( id, fields='name,id,gender,birthday,hometown,email') # create an user object user_node = User(user) # create a new node for user graph.create_node(user_node) # get the data for array of posts post_data = result['posts']['data'] i = 0 # create post node for every post the user created for post in post_data: # check if reach the limit if i == limit: break i += 1 post_node = Post(post) graph.create_node(post_node) graph.create_edge( Edge(user_node.get_id(), post_node.get_id(), "POSTED"))
def fetch_submission_comments(self, graph, submission_id, limit=20, sort="top"): """ Fetches comments of a submission nodes: - subreddit - submission - redditor - comment edges: - submission - comment """ url = "https://oauth.reddit.com/comments/" + submission_id + "/" params = {"sort": sort, "limit": limit} response = self.get_request(url, params) # Submission Node submission = response[0]["data"]["children"][0]["data"] graph.create_node(Submission(submission)) # Subreddit Node subreddit = self.request_info(SUBREDDIT_API_URL, submission["subreddit"]) graph.create_node(Subreddit(subreddit)) # Redditor Node redditor = self.request_info(USER_API_URL, submission["author"]) graph.create_node(Redditor(redditor)) # Edges graph.create_edge(Edge(redditor["id"], submission["id"], "POSTED")) graph.create_edge( Edge(submission["id"], redditor["id"], "SUBMISSION_CREATED_BY")) graph.create_edge(Edge(submission["id"], subreddit["id"], "ON")) graph.create_edge( Edge(subreddit["id"], submission["id"], "HAS_SUBMISSION")) # iterate through comments comments = response[1]["data"]["children"] for comment in comments: if comment["kind"] == "more": continue comment = comment["data"] # Redditor Node redditor = self.request_info(USER_API_URL, comment["author"]) graph.create_node(Redditor(redditor)) # Comment Node graph.create_node(Comment(comment)) # Edges if comment["parent_id"][0:2] == "t3": # parent is a submission graph.create_edge( Edge(redditor["id"], comment["id"], "COMMENTED")) graph.create_edge( Edge(comment["id"], redditor["id"], "COMMENT_CREATED_BY")) graph.create_edge( Edge(comment["id"], comment["parent_id"][3:], "ON_POST")) graph.create_edge( Edge(comment["parent_id"][3:], comment["id"], "HAS_COMMENT")) else: graph.create_edge( Edge(redditor["id"], comment["id"], "REPLIED")) graph.create_edge( Edge(comment["id"], redditor["id"], "COMMENT_CREATED_BY")) graph.create_edge( Edge(comment["id"], comment["parent_id"][3:], "TO")) graph.create_edge( Edge(comment["parent_id"][3:], comment["id"], "HAS_REPLY"))
def fetch_redditor_submissions(self, graph, username, limit=20, sort="new", time_filter="month"): """ Fetches submissions a redditor has posted nodes: - subreddit - submission - redditor edges: - submission """ url = USER_API_URL + username + "/submitted" params = {"sort": sort, "limit": limit, "t": time_filter} response = self.get_request(url, params) data = response["data"] after = data["after"] total = int(data["dist"]) submissions = data["children"] # append next request to this list # request more submissions if limit is not reached while total < limit: url = USER_API_URL + username + "?after=" + after params = {"limit": limit} if sort != "hot": params["sort"] = sort params["t"] = time_filter response = self.get_request(url, params) data = response["data"] after = data["after"] dist = int(data["dist"]) if dist == 0: break total += dist submissions.append(data["children"]) # Redditor Node redditor = self.request_info(USER_API_URL, username) graph.create_node(Redditor(redditor)) for submission in submissions: submission = submission["data"] # Subreddit Node subreddit = self.request_info(SUBREDDIT_API_URL, submission["subreddit"]) graph.create_node(Subreddit(subreddit)) # Redditor Node redditor = self.request_info(USER_API_URL, submission["author"]) graph.create_node(Redditor(redditor)) # Submission Node graph.create_node(Submission(submission)) # Edges graph.create_edge(Edge(redditor["id"], submission["id"], "POSTED")) graph.create_edge( Edge(submission["id"], redditor["id"], "SUBMISSION_CREATED_BY")) graph.create_edge(Edge(submission["id"], subreddit["id"], "ON")) graph.create_edge( Edge(subreddit["id"], submission["id"], "HAS_SUBMISSION"))