def transform_users_data(): df = load_dataframes(USERS_RAW_DIR, "reddit", ".txt", USERS_HEADERS) if df is None: return df = transform_data(df) df = filter_users_stats(df) df_to_csv(df, USERS_TRANSFORMED_DIR, "reddit_user_stats") prefix_files(USERS_RAW_DIR, "transformed") log_errors(ERROR_LOG_PATH, "reddit.users_transform", ERRORS)
def load_mentions_data(): df = load_dataframes(ENTITIES_CSV_DIR, "user_mentions", ".csv") if df is None: return try: sqlalch_load(df, "twitter", "user_mentions", ERRORS) except SQLAlchError: pass else: add_prefix_to_files(ENTITIES_CSV_DIR, "user_mentions", "loaded") log_errors(ERROR_LOG_PATH, "twitter.mentions_load", ERRORS)
def load_subs_data(): df = load_dataframes(SUBS_TRANSFORMED_DIR, "reddit", ".csv") if df is None: return try: sqlalch_load(df, "reddit", "sub_counts", ERRORS) except SQLAlchError: pass else: prefix_files(SUBS_TRANSFORMED_DIR, "loaded") log_errors(ERROR_LOG_PATH, "reddit.subs_load", ERRORS)
def load_tweets_data(): df = load_dataframes(TWEET_CSV_DIR, "tweets", ".csv") if df is None: return try: sqlalch_load(df, "twitter", "tweets", ERRORS) except SQLAlchError: pass else: add_prefix_to_files(TWEET_CSV_DIR, "tweets", "loaded") log_errors(ERROR_LOG_PATH, "twitter.tweets_load", ERRORS)
def load_g1_data(): df = load_dataframes(TRANSFORMED_DIR, "g1", ".csv") if df is None: return try: sqlalch_load(df, "noticias", "noticias", ERRORS) except SQLAlchError: pass else: prefix_files(TRANSFORMED_DIR, "loaded") log_errors(ERROR_LOG_PATH, "g1.load", ERRORS)
def load_users_data(): df = load_dataframes(USER_CSV_DIR, "users", ".csv") if df is None: return try: sqlalch_load(df, "twitter", "users", ERRORS) except SQLAlchError: pass else: prefix_files(USER_CSV_DIR, "loaded") log_errors(ERROR_LOG_PATH, "twitter.users_load", ERRORS)