def harvest_single_user(maintask: MainTask, api: tweepy.API, doc: cloudant.document, db: DBHelper) -> bool: max_id = 0 min_id_last_round = None counter = 0 while True: kwargs = { "user_id": doc["_id"], "since_id": int(doc["last_harvest_tweet_id"]) + 1, "include_rts": "false" } if min_id_last_round is not None: kwargs["max_id"] = str(min_id_last_round - 1) try: status_list = api.user_timeline(**kwargs) except Exception as e: maintask.log("user tweets: twitter api error, backoff", e) return False ids = [] if len(status_list) == 0: break for status in status_list: counter += 1 t_json = status._json t_id = int(t_json["id_str"]) max_id = max(max_id, t_id) ids.append(t_id) db.add_tweet(t_json) min_id_last_round = min(ids) maintask.log("user tweets: ids from ", min(ids), "to", max(ids)) doc["last_harvest_tweet_id"] = str( max(max_id, int(doc["last_harvest_tweet_id"]))) doc["last_harvest"] = int(time.time()) doc.save() maintask.log("user tweets: got tweets", counter) return True
def _handle_one_tweet(db: DBHelper, t_json: Dict[str, Any]): db.add_tweet(t_json) db.add_user(t_json["user"]["id_str"], t_json["user"]["screen_name"])