def statuses_lookup(db_path, identifiers, attach, sql, auth, skip_existing, silent): "Fetch tweets by their IDs" auth = json.load(open(auth)) session = utils.session_for_auth(auth) db = utils.open_database(db_path) identifiers = utils.resolve_identifiers(db, identifiers, attach, sql) if skip_existing: existing_ids = set( r[0] for r in db.conn.execute("select id from tweets").fetchall()) identifiers = [i for i in identifiers if int(i) not in existing_ids] if silent: for batch in utils.fetch_status_batches(session, identifiers): utils.save_tweets(db, batch) else: # Do it with a progress bar count = len(identifiers) with click.progressbar( length=count, label="Importing {:,} tweet{}".format( count, "" if count == 1 else "s"), ) as bar: for batch in utils.fetch_status_batches(session, identifiers): utils.save_tweets(db, batch) bar.update(len(batch))
def _shared_friends_ids_followers_ids(db_path, identifiers, attach, sql, auth, ids, sleep, api_url, first_key, second_key): auth = json.load(open(auth)) session = utils.session_for_auth(auth) db = utils.open_database(db_path) identifiers = utils.resolve_identifiers(db, identifiers, attach, sql) for identifier in identifiers: # Make sure this user is saved arg_user_id = identifier if ids else None arg_screen_name = None if ids else identifier profile = utils.get_profile(db, session, arg_user_id, arg_screen_name) user_id = profile["id"] args = {("user_id" if ids else "screen_name"): identifier} for id_batch in utils.cursor_paginate(session, api_url, args, "ids", 5000, sleep): first_seen = datetime.datetime.utcnow().isoformat() db["following"].insert_all( ({ first_key: user_id, second_key: other_id, "first_seen": first_seen } for other_id in id_batch), ignore=True, ) time.sleep(sleep)
def lists(db_path, identifiers, attach, sql, auth, ids, members): "Fetch lists belonging to specified users" auth = json.load(open(auth)) session = utils.session_for_auth(auth) db = utils.open_database(db_path) identifiers = utils.resolve_identifiers(db, identifiers, attach, sql) # Make sure we have saved these users to the database for batch in utils.fetch_user_batches(session, identifiers, ids): utils.save_users(db, batch) first = True for identifier in identifiers: if ids: kwargs = {"user_id": identifier} else: kwargs = {"screen_name": identifier} fetched_lists = utils.fetch_lists(db, session, **kwargs) if members: for new_list in fetched_lists: utils.fetch_and_save_list( db, session, new_list["full_name"].rstrip("@") ) if not first: # Rate limit is one per minute first = False time.sleep(60)
def users_lookup(db_path, identifiers, attach, sql, auth, ids): "Fetch user accounts" auth = json.load(open(auth)) session = utils.session_for_auth(auth) db = utils.open_database(db_path) identifiers = utils.resolve_identifiers(db, identifiers, attach, sql) for batch in utils.fetch_user_batches(session, identifiers, ids): utils.save_users(db, batch)
def _shared_friends_followers(db_path, identifiers, attach, sql, auth, ids, silent, noun): assert noun in ("friends", "followers") auth = json.load(open(auth)) session = utils.session_for_auth(auth) db = utils.open_database(db_path) identifiers = utils.resolve_identifiers(db, identifiers, attach, sql) if not identifiers: profile = utils.get_profile(db, session) identifiers = [profile["screen_name"]] for identifier in identifiers: if ids: kwargs = {"user_id": identifier} else: kwargs = {"screen_name": identifier} fetched = [] # Get the follower count, so we can have a progress bar count = 0 profile = utils.get_profile(db, session, **kwargs) screen_name = profile["screen_name"] user_id = profile["id"] save_users_kwargs = {} if noun == "followers": save_users_kwargs["followed_id"] = user_id elif noun == "friends": save_users_kwargs["follower_id"] = user_id def go(update): for users_chunk in utils.fetch_user_list_chunks(session, user_id, screen_name, noun=noun): fetched.extend(users_chunk) utils.save_users(db, users_chunk, **save_users_kwargs) update(len(users_chunk)) if not silent: count = profile["{}_count".format(noun)] with click.progressbar( length=count, label="Importing {:,} {} for @{}".format( count, noun, screen_name), ) as bar: go(bar.update) else: go(lambda x: None)
def follow(db_path, identifiers, attach, sql, ids, auth, verbose): "Experimental: Follow these Twitter users and save tweets in real-time" auth = json.load(open(auth)) session = utils.session_for_auth(auth) db = utils.open_database(db_path) identifiers = utils.resolve_identifiers(db, identifiers, attach, sql) # Make sure we have saved these users to the database for batch in utils.fetch_user_batches(session, identifiers, ids): utils.save_users(db, batch) # Ensure we have user IDs, not screen names if ids: follow = identifiers else: follow = utils.user_ids_for_screen_names(db, identifiers) # Start streaming: for tweet in utils.stream_filter(session, follow=follow): if verbose: print(json.dumps(tweet, indent=2)) with db.conn: utils.save_tweets(db, [tweet])
def user_timeline( db_path, identifiers, attach, sql, auth, ids, stop_after, user_id, screen_name, since, since_id, ): "Save tweets posted by specified user" auth = json.load(open(auth)) session = utils.session_for_auth(auth) db = utils.open_database(db_path) identifiers = utils.resolve_identifiers(db, identifiers, attach, sql) # Backwards compatible support for old --user_id and --screen_name options if screen_name: if ids: raise click.ClickException("Cannot use --screen_name with --ids") identifiers.append(screen_name) if user_id: if not identifiers: identifiers = [user_id] else: if not ids: raise click.ClickException("Use --user_id with --ids") identifiers.append(user_id) # If identifiers is empty, fetch the authenticated user fetch_profiles = True if not identifiers: fetch_profiles = False profile = utils.get_profile(db, session, user_id, screen_name) identifiers = [profile["screen_name"]] ids = False format_string = ( "@{:" + str(max(len(str(identifier)) for identifier in identifiers)) + "}") for identifier in identifiers: kwargs = {} if ids: kwargs["user_id"] = identifier else: kwargs["screen_name"] = identifier if fetch_profiles: profile = utils.get_profile(db, session, **kwargs) else: profile = db["users"].get(profile["id"]) expected_length = profile["statuses_count"] if since or since_id: expected_length = None with click.progressbar( utils.fetch_user_timeline(session, db, stop_after=stop_after, since_id=since_id, since=since, **kwargs), length=expected_length, label=format_string.format(profile["screen_name"]), show_pos=True, ) as bar: # Save them 100 at a time chunk = [] for tweet in bar: chunk.append(tweet) if len(chunk) >= 100: utils.save_tweets(db, chunk) chunk = [] if chunk: utils.save_tweets(db, chunk)