Пример #1
0
def get_users(engine=None):
    if not engine:
        engine = db.get_db_engine()
    all_users = pd.read_sql('SELECT DISTINCT user_id, username FROM tweets;',
                            con=engine)
    my_logger.info(f"Found {len(all_users)} distinct users")
    return all_users
Пример #2
0
def main():
    c = twitter.make_config()
    queries = get_queries()
    gaps = get_date_gaps()
    engine = db.get_db_engine()
    for query in queries:
        name, q = query
        dates = gaps[gaps['name'] == name]
        if len(dates) == 0:
            continue
        c.Search = q
        for start_date, end_date in zip(dates['start'], dates['end']):
            for d1, d2 in utils.date_range(start_date, end_date, step=1):
                c.Since = d1
                c.Until = d2

                # Running the search
                for i in range(15):
                    try:
                        my_logger.info(f'{name}:{d1}:Attempt {i + 1}')
                        run_search(c, name, engine, d1)
                        break
                    except (TimeoutError, ClientError, TweetError,
                            async_TimeoutError) as e:
                        msg = f'{name}:{d1}:{e}'
                        my_logger.error(msg)
                time.sleep(2)

        n_tweets_total = db.count_tweets(where=f"name = '{name}'")
        my_logger.info(
            f'TOTAL OF {n_tweets_total} TWEETS DOWNLOADED FOR {name}')
Пример #3
0
def create_users_table(engine=None, drop=False):
    if not engine:
        engine = db.get_db_engine()
    if drop:
        engine.execute('''DROP TABLE IF EXISTS users;''')
    engine.execute('''
        CREATE TABLE users(
            id INT PRIMARY KEY,
            username VARCHAR(20) UNIQUE,
            followers LONGTEXT,
            following LONGTEXT
        );
        ''')
Пример #4
0
def main():
    engine = db.get_db_engine()
    df_users = get_users(engine)
    df_users = df_users.rename(columns={"user_id": "id"})
    df_users.to_sql('users', con=engine, if_exists='append', index=False)
Пример #5
0
import pandas as pd

from twitpol import config, utils, db

processed_dir = config.PROCESSED

engine = db.get_db_engine()
data = pd.read_sql('SELECT * FROM TWEETS LIMIT 100', con=engine)

# `processed_dir` is not a string, but a POSIXPath object.
# This doesn't change much for our purposes, except it allows you to do path
# concatenation for windows or mac with this neat backslash syntax, rather
# than typing os.path.join(...) every time you want to build a new path. Yay
# platform-independence!
data.to_csv(processed_dir / 'example_data.csv')