def troll_bot_analyzer(user, api): try: user_data = data_user(user, api) except tweepy.TweepError: logging.error("This user is protected or does not exist. His information cannot be accessed") else: if len(user_data["tweets"]) == 0: logging.error("There is not enough information to classify this user") return False hashtags_per_tweet = float(user_data["number_hashtags"]) / len(user_data["tweets"]) mentions_per_tweet = float(user_data["number_mentions"]) / len(user_data["tweets"]) percentage_tweet_with_mention = float(user_data["tweet_with_mentions"]) / len(user_data["tweets"]) percentage_tweet_with_hashtag = float(user_data["tweets_with_hashtags"]) / len(user_data["tweets"]) signs_per_char, capitals_per_char, activity, percentage_tweet_with_omg = drama_queen(user_data) periodicity, answer = periodicity_answer(user_data) diversity_hashtags = tweet_iteration_hashtags(user_data) diversity_tweets = tweet_iteration_stemming(user_data) urls_percentage = tweet_iteration_urls(user_data) num_stalker, who_stalker = stalker(user_data) per_drama_queen = percentage_drama_queen(activity, percentage_tweet_with_omg, capitals_per_char, signs_per_char, percentage_tweet_with_hashtag, percentage_tweet_with_mention, mentions_per_tweet, hashtags_per_tweet) per_bot = percentage_bot(periodicity, answer, diversity_tweets) per_stalker, famous, non_famous = percentage_stalker(num_stalker, who_stalker, mentions_per_tweet, percentage_tweet_with_mention, api) if per_stalker == 0: per_stalker = num_stalker per_spammer = percentage_spammer(diversity_tweets, diversity_hashtags, urls_percentage) per_hater = (1 - sentiment(user_data)) * 100 max_value = [per_bot, per_drama_queen, per_stalker, per_hater, per_spammer] index = max_value.index(max(max_value)) labels = ["bot", "drama_queen", "stalker", "hater", "spammer"] final = labels[index] return {"user_id": user, "bot": per_bot, "drama_queen": per_drama_queen, "stalker": per_stalker, "hater": per_hater, "spammer": per_spammer, "famous": famous, "non_famous": non_famous, "stalked": who_stalker, "final": final}
def run(self): while screen_names.count() > users.count(): s_name = screen_names.find_one_and_update( {"collected": False}, {"$set": { "collected": True }}) logging.info("User {} in thread {}.".format( s_name["_id"], self.getName())) try: users.insert_one(data_user(s_name["_id"], self.api)) screen_names.update_one({"_id": s_name["_id"]}, {"$set": { "completed": True }}) except TweepError as e: logging.error( "Ups, Arrived to API limit in thread {}. Exception: {}". format(self.getName(), e)) sleep(60 * 15) except Exception as e: logging.error( "Could not store user {}, the Exceception was {}.".format( s_name["user"], e))
def percentage_stalker(num_stalker, who_stalker, mentions_per_tweet, percentage_tweet_with_mention, api): famous = 0 non_famous = 0 if num_stalker > 45: if data_user(str(who_stalker), api)["user_json"]["verified"]: famous = 1 return num_stalker, famous, non_famous if num_stalker > 75: non_famous = 1 result = num_stalker else: result = (6*num_stalker+3*100*percentage_tweet_with_mention-1*100*mentions_per_tweet)/8 else: result = 0 return result, famous, non_famous
def percentage_stalker(num_stalker, who_stalker, mentions_per_tweet, percentage_tweet_with_mention, api): famous = 0 non_famous = 0 if num_stalker > 45: if data_user(str(who_stalker), api)["user_json"]["verified"]: famous = 1 return num_stalker, famous, non_famous if num_stalker > 75: non_famous = 1 result = num_stalker else: result = (6 * num_stalker + 3 * 100 * percentage_tweet_with_mention - 1 * 100 * mentions_per_tweet) / 8 else: result = 0 return result, famous, non_famous
def run(self): while screen_names.count() > users.count(): s_name = screen_names.find_one_and_update({"collected": False}, {"$set": {"collected": True}}) logging.info("User {} in thread {}.".format(s_name["_id"], self.getName())) try: users.insert_one(data_user(s_name["_id"], self.api)) screen_names.update_one({"_id": s_name["_id"]}, {"$set": {"completed": True}}) except TweepError as e: logging.error("Ups, Arrived to API limit in thread {}. Exception: {}".format(self.getName(), e)) sleep(60 * 15) except Exception as e: logging.error("Could not store user {}, the Exceception was {}.".format(s_name["user"], e))
def troll_bot_analyzer(user, api): try: user_data = data_user(user, api) except tweepy.TweepError: logging.error( "This user is protected or does not exist. His information cannot be accessed" ) else: if len(user_data["tweets"]) == 0: logging.error( "There is not enough information to classify this user") return False hashtags_per_tweet = float(user_data["number_hashtags"]) / len( user_data["tweets"]) mentions_per_tweet = float(user_data["number_mentions"]) / len( user_data["tweets"]) percentage_tweet_with_mention = float( user_data["tweet_with_mentions"]) / len(user_data["tweets"]) percentage_tweet_with_hashtag = float( user_data["tweets_with_hashtags"]) / len(user_data["tweets"]) signs_per_char, capitals_per_char, activity, percentage_tweet_with_omg = drama_queen( user_data) periodicity, answer = periodicity_answer(user_data) diversity_hashtags = tweet_iteration_hashtags(user_data) diversity_tweets = tweet_iteration_stemming(user_data) urls_percentage = tweet_iteration_urls(user_data) num_stalker, who_stalker = stalker(user_data) per_drama_queen = percentage_drama_queen( activity, percentage_tweet_with_omg, capitals_per_char, signs_per_char, percentage_tweet_with_hashtag, percentage_tweet_with_mention, mentions_per_tweet, hashtags_per_tweet) per_bot = percentage_bot(periodicity, answer, diversity_tweets) per_stalker, famous, non_famous = percentage_stalker( num_stalker, who_stalker, mentions_per_tweet, percentage_tweet_with_mention, api) if per_stalker == 0: per_stalker = num_stalker per_spammer = percentage_spammer(diversity_tweets, diversity_hashtags, urls_percentage) per_hater = (1 - sentiment(user_data)) * 100 max_value = [ per_bot, per_drama_queen, per_stalker, per_hater, per_spammer ] index = max_value.index(max(max_value)) labels = ["bot", "drama_queen", "stalker", "hater", "spammer"] final = labels[index] return { "user_id": user, "bot": per_bot, "drama_queen": per_drama_queen, "stalker": per_stalker, "hater": per_hater, "spammer": per_spammer, "famous": famous, "non_famous": non_famous, "stalked": who_stalker, "final": final }