def get_users_accounts(): media_id = get_media_id() for username, vals in user_data.items(): ck = vals['consumer_key'] cs = vals['consumer_secret'] at = vals['access_token'] ats = vals['access_token_secret'] auth = tweepy.OAuthHandler(ck, cs) auth.set_access_token(at, ats) api = tweepy.API(auth, wait_on_rate_limit=True) client_id = get_user_id(username) followers, follows = get_account_data(api) client_dict = { 'client_id': [client_id], 'media_id': [media_id], 'followers': [len(followers)], 'follows': [len(follows)], 'follower_users': [json.dumps(followers)], 'created_on': [datetime.now()] } client_df = pd.DataFrame(client_dict) insert_values(client_df, 'clients_accounts') return None
def unfollow_haters( session, username, unfollow_list, media_id, max_interactions ): user_id = get_user_id(username) int_id = get_interaction_id('unfollow') n = 1 with smart_run(session): for f in sample(unfollow_list, len(unfollow_list)): if n > max_interactions: return None try: session.unfollow_users(customList=(True, [f], "all")) user_followers, user_follows = session.get_follow_count(f) f_dict = { 'client_id': [user_id], 'username': [f], 'user_followers': [user_followers], 'user_follows': [user_follows], 'created_on': [datetime.now()], 'interaction_id': [int_id], 'media_id': [media_id] } follow_df = pd.DataFrame(f_dict) insert_values(follow_df, 'interactions') n += 1 take_a_nap() except Exception as e: logger.info(e) return None
def remove_favs(api, username, media_id, max_interactions): user_id = get_user_id(username) int_id = get_interaction_id('unlike') n = 1 for tweet in tweepy.Cursor(api.favorites).items(): if n > max_interactions: return None api.destroy_favorite(tweet.id) tweet_dict = { 'client_id': [user_id], 'content_id': [tweet.id], 'content': [tweet.text], 'user_name': [tweet.user.screen_name], 'user_followers': [tweet.user.followers_count], 'user_follows': [tweet.user.friends_count], 'likes_in_content': [tweet.favorite_count], 'created_on': [datetime.now()], 'interaction_id': [int_id], 'user_location': [tweet.user.location], 'media_id': [media_id] } tweet_df = pd.DataFrame(tweet_dict) insert_values(tweet_df, 'interactions') n += 1 take_a_nap() return None
def unfollow_haters(api, username, untouchable_users, media_id, max_interactions): user_id = get_user_id(username) int_id = get_interaction_id('unfollow') n = 1 for f in tweepy.Cursor(api.friends).items(): if n > max_interactions: return None if f.screen_name not in untouchable_users: try: api.destroy_friendship(f.screen_name) f_dict = { 'client_id': [user_id], 'content_id': [f.id], 'username': [f.screen_name], 'user_followers': [f.followers_count], 'user_follows': [f.friends_count], 'created_on': [datetime.now()], 'interaction_id': [int_id], 'user_location': [f.location], 'media_id': [media_id] } follow_df = pd.DataFrame(f_dict) insert_values(follow_df, 'interactions') n += 1 take_a_nap() except Exception as e: logger.info(e) return None
def insert_predictive_stats(df): shot_pos_df = get_shot_possessions(df) pred_df = get_predicted_values(shot_pos_df) # Insert in postgres stats_dict = {c: c for c in pred_df.columns} insert_values(pred_df, 'pred_stats', stats_dict) logger.info("Predictive values inserted")
def build_standard_tables(): create_tables(PROJECT_DIR, STANDARD_TABLES) for t in STANDARD_TABLES: file_path = os.path.join(PROJECT_DIR, 'csvs', '{}.csv'.format(t)) df = pd.read_csv(file_path) stats_dict = {c: c for c in df.columns} insert_values(df, t, stats_dict) logger.info("Standard tables created & inserted")
def follow_users(api, username, users, media_id, max_interactions, not_follow_users=[]): user_id = get_user_id(username) int_id = get_interaction_id('follow') my_followers = [ x.screen_name for x in tweepy.Cursor(api.followers).items() ] my_follows = [x.screen_name for x in tweepy.Cursor(api.friends).items()] total_not_follow = not_follow_users + my_followers + my_follows unique_not_follow = set(total_not_follow) n = 1 for u in sample(users, len(users)): followers = tweepy.Cursor(api.followers, screen_name=u).items() followers_list = list(followers) for f in sample(followers_list, len(followers_list)): if n > max_interactions: return None if f.screen_name not in unique_not_follow: try: f.follow() f_dict = { 'client_id': [user_id], 'id': [f.id], 'username': [f.screen_name], 'user_followers': [f.followers_count], 'user_follows': [f.friends_count], 'created_on': [datetime.now()], 'interaction_id': [int_id], 'user_to_follow': [u], 'user_location': [f.location], 'media_id': [media_id] } follow_df = pd.DataFrame(f_dict) insert_values(follow_df, 'interactions') n += 1 take_a_nap() except Exception as e: logger.info(e) continue return None
def insert_df(df, table, stats_dict): query = 'select id from {}'.format(table) ids_inserted_df = get_df_from_query(query) ids_inserted = [] if not ids_inserted_df.empty: ids_inserted = ids_inserted_df['id'].tolist() df['id'] = df['id'].astype(int) df_to_insert = df[~df['id'].isin(ids_inserted)] if not df_to_insert.empty: insert_values(df_to_insert, table, stats_dict) else: logger.info("All values in {} are already inserted".format(table))
def get_client_values(): data_dict = insert_data() print("\nThese are the values inserted") for k, v in data_dict.items(): print('{}:'.format(k), v[0]) values_ok = input("Are these values ok? y/n ") if values_ok not in ['y', 'n']: values_ok = input("Are these values ok? y/n ") if values_ok == 'y': data_dict['created_on'] = datetime.now() data_df = pd.DataFrame(data_dict) insert_values(data_df, 'clients') print("Data inserted ok :)") else: get_client_values()
def like_tweets(api, username, kws, likes_per_kw, media_id, max_interactions): user_id = get_user_id(username) int_id = get_interaction_id('like') n = 1 random_kws = list(sample(kws, len(kws))) for kw in random_kws: search = kw for tweet in tweepy.Cursor(api.search, search).items(likes_per_kw): if n > max_interactions: return None try: tweet.favorite() tweet_dict = { 'client_id': [user_id], 'content_id': [tweet.id], 'content_text': [tweet.text], 'username': [tweet.user.screen_name], 'user_followers': [tweet.user.followers_count], 'user_follows': [tweet.user.friends_count], 'likes_in_content': [tweet.favorite_count], 'created_on': [datetime.now()], 'interaction_id': [int_id], 'kw_searched': [kw], 'user_location': [tweet.user.location], 'media_id': [media_id] } tweet_df = pd.DataFrame(tweet_dict) insert_values(tweet_df, 'interactions') n += 1 take_a_nap() except tweepy.TweepError as e: logger.info(e) except StopIteration: break return None
def get_users_accounts(): media_id = get_media_id() session = InstaPy( username=DINO_USER, password=DINO_PASS, headless_browser=True ) clients = get_all_clients() for row in clients.iterrows(): vals = row[1] username = vals['ig_username'] client_id = vals['id'] logger.info("Getting data for {}".format(username)) followers_count, follows_count = session.get_follow_count(username) # followers = session.grab_followers( # username=username, # amount="full", # live_match=False, # store_locally=False # ) client_dict = { 'client_id': [client_id], 'media_id': [media_id], 'followers': [followers_count], 'follows': [follows_count], # 'follower_users': [json.dumps(followers)], 'created_on': [datetime.now()] } client_df = pd.DataFrame(client_dict) insert_values(client_df, 'clients_accounts') return None
def follow_users( session, username, users, media_id, max_interactions, min_followers, min_following, max_followers, follow_for_like ): user_id = get_user_id(username) int_id = get_interaction_id('follow') n = 1 for u in sample(users, len(users)): logger.info("Following {} followers".format(u)) new_session = InstaPy( username=username, password=user_data[username]['key'], headless_browser=True ) with smart_run(new_session): followers = new_session.grab_followers( username=u, amount=5 * max_interactions, live_match=False, store_locally=False ) new_session.set_relationship_bounds( enabled=True, delimit_by_numbers=True, min_followers=0, min_following=0 ) new_session.set_do_follow(enabled=True, times=1) failed_number = 0 for f in sample(followers, len(followers)): if n > max_interactions: return None if failed_number >= 10: continue try: user_followers, user_follows = session.get_follow_count(f) if not ( user_followers >= min_following and user_follows >= min_following and user_followers <= max_followers ): continue if follow_for_like: new_session.follow_likers( [f], photos_grab_amount=3, follow_likers_per_photo=25, randomize=True ) else: new_session.follow_by_list(followlist=[f]) f_dict = { 'client_id': [user_id], 'username': [f], 'user_followers': [user_followers], 'user_follows': [user_follows], 'created_on': [datetime.now()], 'interaction_id': [int_id], 'user_to_follow': [u], 'media_id': [media_id] } follow_df = pd.DataFrame(f_dict) insert_values(follow_df, 'interactions') n += 1 take_a_nap() failed_number = 0 except Exception as e: failed_number += 1 logger.info(e) continue return None
def insert_game_info(sd, ed, leagues, seasons): games_df = get_games_id(sd, ed, leagues, seasons) insert_values(games_df, 'games_info', GAME_INFO_COLS) logger.info("Inserted game info for games between {} - {}".format(sd, ed)) return games_df
def insert_players_stats(game_id): players_df = get_players_stats(game_id) insert_values(players_df, 'players_stats', PLAYERS_STATS_COLS) logger.info("Inserted players stats for game {}".format(game_id)) return None
def insert_teams_stats(game_id): teams_df = get_teams_stats(game_id) insert_values(teams_df, 'teams_stats', TEAMS_STATS_COLS) logger.info("Inserted teams stats for game {}".format(game_id)) return None
def insert_game_events(game_id, teams): game_df = process_game(get_game_events(game_id, teams)) insert_values(game_df, 'game_events', GAME_EVENTS_COLS) logger.info("Inserted events for game {}".format(game_id)) return None