def build_graph_level1(): khiem_id = 557374910 minh_le_id = 19104945 nguyenthaiha_id = 47043030 users_l1 = [] khiem = api.GetUser(khiem_id) minh_le = api.GetUser(minh_le_id) nguyenthaiha = api.GetUser(nguyenthaiha_id) #users_l1.extend([khiem, minh_le, nguyenthaiha]) list1 = findTopFriends(userid=khiem_id) list2 = findTopFriends(userid=minh_le_id) list3 = findTopFriends(userid=nguyenthaiha_id) users_l1.extend(list1) users_l1.extend(list2) users_l1.extend(list3) data = [] followings = [] for i in range(0, len(users_l1)): user = users_l1[i] print "User " + user.name + " has " + str( user.followers_count) + " followers" data.append(user.AsDict()) for u in list1: followings.append({"user_id": khiem_id, "friend_id": u.id}) for u in list2: followings.append({"user_id": minh_le_id, "friend_id": u.id}) for u in list3: followings.append({"user_id": nguyenthaiha_id, "friend_id": u.id}) datastore.store(data, "twitter_users_level1_new", "Twitter") datastore.store(followings, "twitter_followings_level1", "Twitter")
def get_followings(): users = list(datastore.load('khiem.twitter_users')) user_ids = [] followings = [] for i in range(0, len(users) - 1): user_ids.append(users[i].id) for i in range(500, 751): user = users[i] #friend_ids = api.GetFriendIDs() #for friend_id in friend_ids: # if friend_id in user_ids: # followings.append({"user_id": user["id"], "friend_id": friend_id}) try: follower_ids = api.GetFollowerIDs(userid=user["id"])["ids"] print str(i) + ": Found " + str( len(follower_ids)) + " followers of " + user.name for follower_id in follower_ids: if follower_id in user_ids: followings.append({ "user_id": follower_id, "friend_id": user.id }) except Exception as e: print e continue datastore.store(followings, 'twitter_followings_500_750', 'Twitter followings') print "Successfully persist followings: " + str(len(followings)) + " items"
def build_graph_level1(): khiem_id = 557374910 minh_le_id = 19104945 nguyenthaiha_id = 47043030 users_l1 = [] khiem = api.GetUser(khiem_id) minh_le = api.GetUser(minh_le_id) nguyenthaiha = api.GetUser(nguyenthaiha_id) #users_l1.extend([khiem, minh_le, nguyenthaiha]) list1 = findTopFriends(userid=khiem_id) list2 = findTopFriends(userid=minh_le_id) list3 = findTopFriends(userid=nguyenthaiha_id) users_l1.extend(list1) users_l1.extend(list2) users_l1.extend(list3) data = [] followings = [] for i in range(0,len(users_l1)): user = users_l1[i] print "User " + user.name + " has " + str(user.followers_count) + " followers" data.append(user.AsDict()) for u in list1: followings.append({"user_id": khiem_id, "friend_id": u.id}) for u in list2: followings.append({"user_id": minh_le_id, "friend_id": u.id}) for u in list3: followings.append({"user_id": nguyenthaiha_id, "friend_id": u.id}) datastore.store(data, "twitter_users_level1_new", "Twitter") datastore.store(followings, "twitter_followings_level1", "Twitter")
def rebuild_tweets(): tweets = list(datastore.load('twitter_tweets')) new_tweets = [] for tweet in tweets: urls = tweet['urls'] hashtags = tweet['hashtags'] if urls is not None and urls != '': try: urls = ast.literal_eval(urls) tweet['urls'] = json.dumps(urls) except Exception as e: traceback.print_exc() tweet['urls'] = '' if hashtags is not None and hashtags != '': try: hashtags = ast.literal_eval(hashtags) tweet['hashtags'] = json.dumps(hashtags) except Exception as e: traceback.print_exc() tweet['hashtags'] = '' new_tweets.append(tweet) datastore.store(new_tweets, 'twitter_tweets_cleaned', '')
def get_friends(): print "=============================================" user_list = [] user_ids = [] followings = [] users = api.GetFriends() print "You're following " + str(len(users)) + " users" print[u.name for u in users] print "==============================================" print "Friends's tweets" #for u in users:import traceback # statuses = api.GetUserTimeline(u.screen_name) # #print [s.text for s in statuses] # print u.name + " has " + str(len(statuses)) + " posts recently." for u in users: user_list.append(u.AsDict()) user_ids.append(u.id) followings.append({"user_id": me.id, "friend_id": u.id}) for u in users: friends = api.GetFriends(u.screen_name) #user_list.extend(friends) for friend in friends: followings.append({"user_id": u.id, "friend_id": friend.id}) if (friend.id not in user_ids): user_list.append(friend.AsDict()) user_ids.append(friend.id) print u.name + " has " + str(len(friends)) + " friends" print "Total users " + str(len(user_list)) datastore.store(user_list, 'twitter_users', 'Twitter users')
def get_followings(): users = list(datastore.load('khiem.twitter_users')) user_ids = [] followings = [] for i in range(0,len(users)-1): user_ids.append(users[i].id) for i in range(500, 751): user = users[i] #friend_ids = api.GetFriendIDs() #for friend_id in friend_ids: # if friend_id in user_ids: # followings.append({"user_id": user["id"], "friend_id": friend_id}) try: follower_ids = api.GetFollowerIDs(userid=user["id"])["ids"] print str(i) + ": Found " + str(len(follower_ids)) + " followers of " + user.name for follower_id in follower_ids: if follower_id in user_ids: followings.append({"user_id": follower_id, "friend_id": user.id}) except Exception as e: print e continue datastore.store(followings, 'twitter_followings_500_750', 'Twitter followings') print "Successfully persist followings: " + str(len(followings)) + " items"
def getTweets(start, stop): tweet_entities = [ "id", "truncated", "hashtags", "urls", "user_mentions", "text", "source", "retweet_count", "created_at", "user", "favorited" ] users = list(datastore.load('twitter_users_cleaned')) total_users = len(users) if (stop - start > 300): print "Max (stop-start) is 300" return users = users[start:stop] data = [] index = start try: for u in users: print str(index) + ": Getting tweets for user " + u['name'] try: tweets = getTopTweets(userid=int(u['id'])) if (tweets is not None): print "Fetched " + str( len(tweets)) + " tweets of user " + u['name'] for t in tweets: st = t.AsDict() obj = {} for prop in tweet_entities: if prop not in st: if prop == "retweet_count" or prop == "favorited" or prop == "truncated": obj[prop] = 0 else: obj[prop] = '' elif prop == "user": user_obj = dict(st["user"]) obj['user_id'] = user_obj['id'] obj['user_screen_name'] = user_obj[ 'screen_name'] obj['user_name'] = user_obj['name'] else: obj[prop] = st[prop] data.append(obj) else: print "Empty tweets for user " + u['name'] except Exception as e: print 'Exception occurs when fetch tweets for user ' + u['name'] traceback.print_exc() index = index + 1 except Exception as e: traceback.print_exc() print str(datetime.now()) return datastore.store( data, 'twitter_tweets_cleaned_' + str(start) + '_' + str(stop - 1), '') print str( datetime.now()) + ' Result at segment ' + str(start) + ' -> ' + str( stop - 1) + '/' + str(total_users) + ': Total ' + str( len(data)) + ' tweets'
def save(self, datastore): """ Save the item """ app.logger.debug( "Saving {}/{}/{}/{}\n\t{}".format(self.index, self.account, self.region, self.name, self.new_config) ) datastore.store( self.index, self.region, self.account, self.name, self.active, self.new_config, new_issues=self.audit_issues )
def post(self): """ We decode the incoming info and pass it on to the "formatter" for further processing """ update = self.request.body json_data = json.loads(update) logger.info("Received live update from instagram: %s", pformat(json_data)) # format it formatted = dataformatter.format_instagram(json_data) # store it datastore.store(formatted)
def grabContent(start, stop): if stop < 0 or start < 0 or stop < start: print "Invalid arguments" return articles = [] tweets = list(datastore.load('twitter_tweets', start, stop - start)) #tweets = tweets[start:stop] print "Loaded " + str( len(tweets)) + " tweets from `twitter_tweets` table, " + str( start) + " -> " + str(stop - 1) index = start url_counter = 0 for tweet in tweets: tweet_id = tweet['id'] user_id = tweet['user_id'] user_screen_name = tweet['user_screen_name'] text = tweet['text'] urls = tweet['urls'] print str(index) + ": Processing tweet " + str(tweet_id) if urls is not None and urls.strip() != '': #urls = dict(json.loads(urls, encoding="utf-8")) try: urls = ast.literal_eval(urls) except Exception as e: continue print "Found " + str(len(urls)) + " urls" for url in urls: url_counter = url_counter + 1 display_url = urls[url] print "Grabbing content from " + url + " ..." content = None try: content = contentdetector.upgradeLink(url) except Exception as e: print "Exception occurs when trying to grab content from " + url traceback.print_exc() if content is not None and content.strip() != '': articles.append({ "tweet_id": tweet_id, "user_id": user_id, "user_screen_name": user_screen_name, "text": text, "url": url, "display_url": display_url, "content": content }) index = index + 1 print "Processed tweets " + str(start) + " -> " + str(stop - 1) print "Total urls processed: " + str(url_counter) print "Total articles grabbed: " + str(len(articles)) datastore.store(articles, 'twitter_articles_' + str(start) + '_' + str(stop - 1), '')
def save(self, datastore): """ Save the item """ app.logger.debug("Saving {}/{}/{}/{}\n\t{}".format( self.index, self.account, self.region, self.name, self.new_config)) datastore.store(self.index, self.region, self.account, self.name, self.active, self.new_config, new_issues=self.audit_issues)
def rebuild_users_level1(): users = list(datastore.load('khiem.twitter_users_level1')) users = users[3:] user_ids = [] for u in users: user_ids.append(u["id"]) users = api.UsersLookup(user_id=user_ids) user_data = [] for u in users: user_data.append(u.AsDict()) print "Successfully retrieve " + str(len(users)) + " users info" datastore.store(user_data, 'twitter_users_level1_rebuild', '')
def run(ID): print "Running special test with ID:", ID for n in range(2, 20): constants.num_neurons = n population = init_population() datastore.store(population, 0, ID) print "Dieing early." return generation = 0 while True: datastore.store(population, generation, ID) generation = generation + 1 population = selection(population)
def getTweets(start, stop): tweet_entities = ["id", "truncated", "hashtags", "urls", "user_mentions", "text", "source", "retweet_count", "created_at", "user", "favorited"] users = list(datastore.load('twitter_users_cleaned')) total_users = len(users) if (stop - start > 300): print "Max (stop-start) is 300" return users = users[start:stop] data = [] index = start try: for u in users: print str(index) + ": Getting tweets for user " + u['name'] try: tweets = getTopTweets(userid=int(u['id'])) if (tweets is not None): print "Fetched " + str(len(tweets)) + " tweets of user " + u['name'] for t in tweets: st = t.AsDict() obj = {} for prop in tweet_entities: if prop not in st: if prop == "retweet_count" or prop == "favorited" or prop == "truncated": obj[prop] = 0 else: obj[prop] = '' elif prop == "user": user_obj = dict(st["user"]) obj['user_id'] = user_obj['id'] obj['user_screen_name'] = user_obj['screen_name'] obj['user_name'] = user_obj['name'] else: obj[prop] = st[prop] data.append(obj) else: print "Empty tweets for user " + u['name'] except Exception as e: print 'Exception occurs when fetch tweets for user ' + u['name'] traceback.print_exc() index = index + 1 except Exception as e: traceback.print_exc() print str(datetime.now()) return datastore.store(data, 'twitter_tweets_cleaned_' + str(start) + '_' + str(stop-1), '') print str(datetime.now()) + ' Result at segment ' + str(start) + ' -> ' + str(stop-1) + '/' + str(total_users) + ': Total ' + str(len(data)) + ' tweets'
def build_graph_level3(start, stop): if (stop - start > 15): print "The [start, stop) range must not greater than 15, or (stop-start) <= 15" return users_l2 = list(datastore.load('khiem.twitter_users_level2')) users_l2 = users_l2[start:stop] user_ids = [] followings = [] users_l3 = [] index = start try: for user in users_l2: print str(index) + ": Find top friends for user " + user['name'] try: friends = findTopFriends(user["id"]) for friend in friends: followings.append({"user_id": user["id"], "friend_id": friend.id}) if friend.id not in user_ids: user_ids.append(friend.id) users_l3.append(friend.AsDict()) except Exception as e: print "Exception occurs when find top friends of user " + user['name'] traceback.print_exc() try: print str(index) + ": Find top followers for user " + user['name'] followers = findTopFollowers(user['id']) for follower in followers: followings.append({"user_id": follower.id, "friend_id": user['id']}) if follower.id not in user_ids: user_ids.append(follower.id) users_l3.append(follower.AsDict()) except Exception as e: print "Exception occurs when find top followers of user " + user['name'] traceback.print_exc() index = index + 1 except Exception as e: traceback.print_exc() print str(datetime.now()) return datastore.store(users_l3, "twitter_users_level3_" + str(start) + "_" + str(stop-1), "") datastore.store(followings, "twitter_followings_level3_" + str(start) + "_" + str(stop-1), "") print str(datetime.now()) + ": Result at level 3, segment " + str(start) + "->" + str(stop-1) + ": " + str(len(users_l3)) + " users, " + str(len(followings)) + " followings"
def grabContent(start, stop): if stop < 0 or start < 0 or stop < start: print "Invalid arguments" return articles = [] tweets = list(datastore.load('twitter_tweets', start, stop-start)) #tweets = tweets[start:stop] print "Loaded " + str(len(tweets)) + " tweets from `twitter_tweets` table, " + str(start) + " -> " + str(stop-1) index = start url_counter = 0 for tweet in tweets: tweet_id = tweet['id'] user_id = tweet['user_id'] user_screen_name = tweet['user_screen_name'] text = tweet['text'] urls = tweet['urls'] print str(index) + ": Processing tweet " + str(tweet_id) if urls is not None and urls.strip() != '': #urls = dict(json.loads(urls, encoding="utf-8")) try: urls = ast.literal_eval(urls) except Exception as e: continue print "Found " + str(len(urls)) + " urls" for url in urls: url_counter = url_counter + 1 display_url = urls[url] print "Grabbing content from " + url + " ..." content = None try: content = contentdetector.upgradeLink(url) except Exception as e: print "Exception occurs when trying to grab content from " + url traceback.print_exc() if content is not None and content.strip() != '': articles.append({ "tweet_id": tweet_id, "user_id": user_id, "user_screen_name": user_screen_name, "text": text, "url": url, "display_url": display_url, "content": content }) index = index + 1 print "Processed tweets " + str(start) + " -> " + str(stop-1) print "Total urls processed: " + str(url_counter) print "Total articles grabbed: " + str(len(articles)) datastore.store(articles, 'twitter_articles_' + str(start) + '_' + str(stop-1), '')
def _get_tweets(): tweets = [] users = list(datastore.load('khiem.twitter_users')) for i in range(300,501): try: print str(i) + ': Getting tweets from user ' + users[i]['name'] statuses = api.GetUserTimeline(screen_name=users[i]['screen_name'], count=200) for st in statuses: tweets.append(st.AsDict()) print 'Processed ' + str(len(statuses)) + ' tweets for user ' + users[i]['name'] except Exception as e: print e print 'Exception occurs when process user ' + users[i]['name'] + '. Skip.' continue print 'Storing tweets for ' + str(i) + ' users' # store all tweets datastore.store(tweets, 'twitter_tweets_300_500', 'Tweets')
def build_graph_level2(): # get users from level1, except the top 3 users from level0 users_level1 = list(datastore.load('khiem.twitter_users_level1')) users_level1 = users_level1[3:] users_l2 = [] followings = [] user_ids = [] # for each user find top friends for user in users_level1: print "Find top friends of user " + user["name"] friends = findTopFriends(user["id"]) for friend in friends: followings.append({"user_id": user["id"], "friend_id": friend.id}) if friend.id not in user_ids: user_ids.append(friend.id) users_l2.append(friend.AsDict()) datastore.store(users_l2, "twitter_users_level2", "") datastore.store(followings, "twitter_followings_level2", "")
def _get_tweets(): tweets = [] users = list(datastore.load('khiem.twitter_users')) for i in range(300, 501): try: print str(i) + ': Getting tweets from user ' + users[i]['name'] statuses = api.GetUserTimeline(screen_name=users[i]['screen_name'], count=200) for st in statuses: tweets.append(st.AsDict()) print 'Processed ' + str( len(statuses)) + ' tweets for user ' + users[i]['name'] except Exception as e: print e print 'Exception occurs when process user ' + users[i][ 'name'] + '. Skip.' continue print 'Storing tweets for ' + str(i) + ' users' # store all tweets datastore.store(tweets, 'twitter_tweets_300_500', 'Tweets')
def save(self, datastore, ephemeral=False): """ Save the item """ app.logger.debug("Saving {}/{}/{}/{}\n\t{}".format(self.index, self.account, self.region, self.name, self.new_config)) self.db_item = datastore.store( self.index, self.region, self.account, self.name, self.active, self.new_config, arn=self.arn, new_issues=self.audit_issues, ephemeral=ephemeral)
def api_store(): token = request.args.get('token') if not token: return "Need valid google oauth token" url = request.args.get('url') tags = request.args.getlist('tags') if not url: return "need url parameter" elif tldextract.extract(url).domain not in supported_services: return "serivce has to be from one of these {}".format( supported_services) tags.sort() result = datastore.store(token, url, tags) if result: return "Thanks!" else: return "Something went wrong."
for u in users: user_list.append(u.AsDict()) user_ids.append(u.id) followings.append({"user_id": me.id, "friend_id": u.id}) for u in users: friends = api.GetFriends(u.screen_name) #user_list.extend(friends) for friend in friends: followings.append({"user_id": u.id, "friend_id": friend.id}) if (friend.id not in user_ids): user_list.append(friend.AsDict()) user_ids.append(friend.id) print u.name + " has " + str(len(friends)) + " friends" print "Total users " + str(len(user_list)) datastore.store(user_list, 'twitter_users', 'Twitter users') #datastore.store(followings, 'twitter_followings', 'Twitter followings') def _get_tweets(): tweets = [] users = list(datastore.load('khiem.twitter_users')) for i in range(300,501): try: print str(i) + ': Getting tweets from user ' + users[i]['name'] statuses = api.GetUserTimeline(screen_name=users[i]['screen_name'], count=200) for st in statuses: tweets.append(st.AsDict()) print 'Processed ' + str(len(statuses)) + ' tweets for user ' + users[i]['name'] except Exception as e: print e
def build_graph_level3(start, stop): if (stop - start > 15): print "The [start, stop) range must not greater than 15, or (stop-start) <= 15" return users_l2 = list(datastore.load('khiem.twitter_users_level2')) users_l2 = users_l2[start:stop] user_ids = [] followings = [] users_l3 = [] index = start try: for user in users_l2: print str(index) + ": Find top friends for user " + user['name'] try: friends = findTopFriends(user["id"]) for friend in friends: followings.append({ "user_id": user["id"], "friend_id": friend.id }) if friend.id not in user_ids: user_ids.append(friend.id) users_l3.append(friend.AsDict()) except Exception as e: print "Exception occurs when find top friends of user " + user[ 'name'] traceback.print_exc() try: print str( index) + ": Find top followers for user " + user['name'] followers = findTopFollowers(user['id']) for follower in followers: followings.append({ "user_id": follower.id, "friend_id": user['id'] }) if follower.id not in user_ids: user_ids.append(follower.id) users_l3.append(follower.AsDict()) except Exception as e: print "Exception occurs when find top followers of user " + user[ 'name'] traceback.print_exc() index = index + 1 except Exception as e: traceback.print_exc() print str(datetime.now()) return datastore.store(users_l3, "twitter_users_level3_" + str(start) + "_" + str(stop - 1), "") datastore.store( followings, "twitter_followings_level3_" + str(start) + "_" + str(stop - 1), "") print str(datetime.now()) + ": Result at level 3, segment " + str( start) + "->" + str(stop - 1) + ": " + str( len(users_l3)) + " users, " + str(len(followings)) + " followings"