Exemplo n.º 1
0
def transform_users_data():
    df = load_dataframes(USERS_RAW_DIR, "reddit", ".txt", USERS_HEADERS)
    if df is None:
        return

    df = transform_data(df)
    df = filter_users_stats(df)

    df_to_csv(df, USERS_TRANSFORMED_DIR, "reddit_user_stats")
    prefix_files(USERS_RAW_DIR, "transformed")
    log_errors(ERROR_LOG_PATH, "reddit.users_transform", ERRORS)
Exemplo n.º 2
0
def load_mentions_data():
    df = load_dataframes(ENTITIES_CSV_DIR, "user_mentions", ".csv")
    if df is None:
        return

    try:
        sqlalch_load(df, "twitter", "user_mentions", ERRORS)
    except SQLAlchError:
        pass
    else:
        add_prefix_to_files(ENTITIES_CSV_DIR, "user_mentions", "loaded")

    log_errors(ERROR_LOG_PATH, "twitter.mentions_load", ERRORS)
Exemplo n.º 3
0
def load_subs_data():
    df = load_dataframes(SUBS_TRANSFORMED_DIR, "reddit", ".csv")
    if df is None:
        return

    try:
        sqlalch_load(df, "reddit", "sub_counts", ERRORS)
    except SQLAlchError:
        pass
    else:
        prefix_files(SUBS_TRANSFORMED_DIR, "loaded")

    log_errors(ERROR_LOG_PATH, "reddit.subs_load", ERRORS)
Exemplo n.º 4
0
def load_tweets_data():
    df = load_dataframes(TWEET_CSV_DIR, "tweets", ".csv")
    if df is None:
        return

    try:
        sqlalch_load(df, "twitter", "tweets", ERRORS)
    except SQLAlchError:
        pass
    else:
        add_prefix_to_files(TWEET_CSV_DIR, "tweets", "loaded")

    log_errors(ERROR_LOG_PATH, "twitter.tweets_load", ERRORS)
Exemplo n.º 5
0
def load_g1_data():
    df = load_dataframes(TRANSFORMED_DIR, "g1", ".csv")
    if df is None:
        return

    try:
        sqlalch_load(df, "noticias", "noticias", ERRORS)
    except SQLAlchError:
        pass
    else:
        prefix_files(TRANSFORMED_DIR, "loaded")

    log_errors(ERROR_LOG_PATH, "g1.load", ERRORS)
Exemplo n.º 6
0
def load_users_data():
    df = load_dataframes(USER_CSV_DIR, "users", ".csv")
    if df is None:
        return

    try:
        sqlalch_load(df, "twitter", "users", ERRORS)
    except SQLAlchError:
        pass
    else:
        prefix_files(USER_CSV_DIR, "loaded")

    log_errors(ERROR_LOG_PATH, "twitter.users_load", ERRORS)