예제 #1
0
def statuses_lookup(db_path, identifiers, attach, sql, auth, skip_existing,
                    silent):
    "Fetch tweets by their IDs"
    auth = json.load(open(auth))
    session = utils.session_for_auth(auth)
    db = utils.open_database(db_path)
    identifiers = utils.resolve_identifiers(db, identifiers, attach, sql)
    if skip_existing:
        existing_ids = set(
            r[0] for r in db.conn.execute("select id from tweets").fetchall())
        identifiers = [i for i in identifiers if int(i) not in existing_ids]
    if silent:
        for batch in utils.fetch_status_batches(session, identifiers):
            utils.save_tweets(db, batch)
    else:
        # Do it with a progress bar
        count = len(identifiers)
        with click.progressbar(
                length=count,
                label="Importing {:,} tweet{}".format(
                    count, "" if count == 1 else "s"),
        ) as bar:
            for batch in utils.fetch_status_batches(session, identifiers):
                utils.save_tweets(db, batch)
                bar.update(len(batch))
예제 #2
0
def _shared_friends_ids_followers_ids(db_path, identifiers, attach, sql, auth,
                                      ids, sleep, api_url, first_key,
                                      second_key):
    auth = json.load(open(auth))
    session = utils.session_for_auth(auth)
    db = utils.open_database(db_path)
    identifiers = utils.resolve_identifiers(db, identifiers, attach, sql)
    for identifier in identifiers:
        # Make sure this user is saved
        arg_user_id = identifier if ids else None
        arg_screen_name = None if ids else identifier
        profile = utils.get_profile(db, session, arg_user_id, arg_screen_name)
        user_id = profile["id"]
        args = {("user_id" if ids else "screen_name"): identifier}
        for id_batch in utils.cursor_paginate(session, api_url, args, "ids",
                                              5000, sleep):
            first_seen = datetime.datetime.utcnow().isoformat()
            db["following"].insert_all(
                ({
                    first_key: user_id,
                    second_key: other_id,
                    "first_seen": first_seen
                } for other_id in id_batch),
                ignore=True,
            )
        time.sleep(sleep)
예제 #3
0
def lists(db_path, identifiers, attach, sql, auth, ids, members):
    "Fetch lists belonging to specified users"
    auth = json.load(open(auth))
    session = utils.session_for_auth(auth)
    db = utils.open_database(db_path)
    identifiers = utils.resolve_identifiers(db, identifiers, attach, sql)
    # Make sure we have saved these users to the database
    for batch in utils.fetch_user_batches(session, identifiers, ids):
        utils.save_users(db, batch)
    first = True
    for identifier in identifiers:
        if ids:
            kwargs = {"user_id": identifier}
        else:
            kwargs = {"screen_name": identifier}
        fetched_lists = utils.fetch_lists(db, session, **kwargs)
        if members:
            for new_list in fetched_lists:
                utils.fetch_and_save_list(
                    db, session, new_list["full_name"].rstrip("@")
                )
        if not first:
            # Rate limit is one per minute
            first = False
            time.sleep(60)
예제 #4
0
def users_lookup(db_path, identifiers, attach, sql, auth, ids):
    "Fetch user accounts"
    auth = json.load(open(auth))
    session = utils.session_for_auth(auth)
    db = utils.open_database(db_path)
    identifiers = utils.resolve_identifiers(db, identifiers, attach, sql)
    for batch in utils.fetch_user_batches(session, identifiers, ids):
        utils.save_users(db, batch)
예제 #5
0
def _shared_friends_followers(db_path, identifiers, attach, sql, auth, ids,
                              silent, noun):
    assert noun in ("friends", "followers")
    auth = json.load(open(auth))
    session = utils.session_for_auth(auth)
    db = utils.open_database(db_path)

    identifiers = utils.resolve_identifiers(db, identifiers, attach, sql)

    if not identifiers:
        profile = utils.get_profile(db, session)
        identifiers = [profile["screen_name"]]

    for identifier in identifiers:
        if ids:
            kwargs = {"user_id": identifier}
        else:
            kwargs = {"screen_name": identifier}

        fetched = []
        # Get the follower count, so we can have a progress bar
        count = 0

        profile = utils.get_profile(db, session, **kwargs)
        screen_name = profile["screen_name"]
        user_id = profile["id"]

        save_users_kwargs = {}
        if noun == "followers":
            save_users_kwargs["followed_id"] = user_id
        elif noun == "friends":
            save_users_kwargs["follower_id"] = user_id

        def go(update):
            for users_chunk in utils.fetch_user_list_chunks(session,
                                                            user_id,
                                                            screen_name,
                                                            noun=noun):
                fetched.extend(users_chunk)
                utils.save_users(db, users_chunk, **save_users_kwargs)
                update(len(users_chunk))

        if not silent:
            count = profile["{}_count".format(noun)]
            with click.progressbar(
                    length=count,
                    label="Importing {:,} {} for @{}".format(
                        count, noun, screen_name),
            ) as bar:
                go(bar.update)
        else:
            go(lambda x: None)
예제 #6
0
def follow(db_path, identifiers, attach, sql, ids, auth, verbose):
    "Experimental: Follow these Twitter users and save tweets in real-time"
    auth = json.load(open(auth))
    session = utils.session_for_auth(auth)
    db = utils.open_database(db_path)
    identifiers = utils.resolve_identifiers(db, identifiers, attach, sql)
    # Make sure we have saved these users to the database
    for batch in utils.fetch_user_batches(session, identifiers, ids):
        utils.save_users(db, batch)
    # Ensure we have user IDs, not screen names
    if ids:
        follow = identifiers
    else:
        follow = utils.user_ids_for_screen_names(db, identifiers)
    # Start streaming:
    for tweet in utils.stream_filter(session, follow=follow):
        if verbose:
            print(json.dumps(tweet, indent=2))
        with db.conn:
            utils.save_tweets(db, [tweet])
예제 #7
0
def user_timeline(
    db_path,
    identifiers,
    attach,
    sql,
    auth,
    ids,
    stop_after,
    user_id,
    screen_name,
    since,
    since_id,
):
    "Save tweets posted by specified user"
    auth = json.load(open(auth))
    session = utils.session_for_auth(auth)
    db = utils.open_database(db_path)
    identifiers = utils.resolve_identifiers(db, identifiers, attach, sql)

    # Backwards compatible support for old --user_id and --screen_name options
    if screen_name:
        if ids:
            raise click.ClickException("Cannot use --screen_name with --ids")
        identifiers.append(screen_name)

    if user_id:
        if not identifiers:
            identifiers = [user_id]
        else:
            if not ids:
                raise click.ClickException("Use --user_id with --ids")
            identifiers.append(user_id)

    # If identifiers is empty, fetch the authenticated user
    fetch_profiles = True
    if not identifiers:
        fetch_profiles = False
        profile = utils.get_profile(db, session, user_id, screen_name)
        identifiers = [profile["screen_name"]]
        ids = False

    format_string = (
        "@{:" + str(max(len(str(identifier))
                        for identifier in identifiers)) + "}")

    for identifier in identifiers:
        kwargs = {}
        if ids:
            kwargs["user_id"] = identifier
        else:
            kwargs["screen_name"] = identifier
        if fetch_profiles:
            profile = utils.get_profile(db, session, **kwargs)
        else:
            profile = db["users"].get(profile["id"])
        expected_length = profile["statuses_count"]

        if since or since_id:
            expected_length = None

        with click.progressbar(
                utils.fetch_user_timeline(session,
                                          db,
                                          stop_after=stop_after,
                                          since_id=since_id,
                                          since=since,
                                          **kwargs),
                length=expected_length,
                label=format_string.format(profile["screen_name"]),
                show_pos=True,
        ) as bar:
            # Save them 100 at a time
            chunk = []
            for tweet in bar:
                chunk.append(tweet)
                if len(chunk) >= 100:
                    utils.save_tweets(db, chunk)
                    chunk = []
            if chunk:
                utils.save_tweets(db, chunk)