def auth_ua(creds_path):
    # app auth can supposedly get 4 as much replies according to:
    #   https://github.com/DocNow/twarc/issues/323
    if creds_path == "":
        t_user = Twarc(app_auth=False)
        t_app = Twarc(app_auth=True)
    else:
        creds = tutils.retrieve_creds(creds_path)
        t_user = Twarc(creds[0], creds[1], creds[2], creds[3], app_auth=False)
        t_app = Twarc(creds[0], creds[1], creds[2], creds[3], app_auth=True)

    return t_user, t_app
Пример #2
0
    def __init__(self, creds = [], neo4j_creds = None, TWEETS_PER_PROCESS=100, TWEETS_PER_ROWGROUP=5000, save_to_neo=False, PARQUET_SAMPLE_RATE_TIME_S=None, debug=False, BATCH_LEN=100, writers = {'snappy': None}):
        self.queue = deque()
        self.writers = writers
        self.last_write_epoch = ''
        self.current_table = None
        self.schema = pa.schema([
            (name, t)
            for (i, name, t) in KNOWN_FIELDS
        ])
        self.timer = Timer()
        self.debug = debug

        self.twarc_pool = TwarcPool([
            Twarc(o['consumer_key'], o['consumer_secret'], o['access_token'], o['access_token_secret'])
            for o in creds
        ])
        self.save_to_neo = save_to_neo
        self.TWEETS_PER_PROCESS = TWEETS_PER_PROCESS #100
        self.TWEETS_PER_ROWGROUP = TWEETS_PER_ROWGROUP #100 1KB x 1000 = 1MB uncompressed parquet
        self.PARQUET_SAMPLE_RATE_TIME_S = PARQUET_SAMPLE_RATE_TIME_S
        self.last_df = None
        self.last_arr = None
        self.last_write_arr = None
        self.last_writes_arr = []

        self.neo4j_creds = neo4j_creds

        self.BATCH_LEN = BATCH_LEN

        self.needs_to_flush = False

        self.__file_names = []
Пример #3
0
 def add_users_by_screen_names(self, screen_names):
     if 'keys' not in self:
         raise CollectionConfigException(
             'Keys are required to add users by screen name.')
     keys = self['keys']
     twarc = Twarc(keys['consumer_key'], keys['consumer_secret'],
                   keys['access_token'], keys['access_token_secret'])
     # Lower case to original case
     screen_name_case_map = {}
     for screen_name in screen_names:
         clean_screen_name = screen_name.lstrip('@')
         if clean_screen_name:
             screen_name_case_map[
                 clean_screen_name.lower()] = clean_screen_name
     if 'users' not in self:
         self['users'] = {}
     delete_users = []
     for user in twarc.user_lookup(screen_name_case_map.keys(),
                                   id_type='screen_name'):
         if user['id_str'] not in self['users']:
             self['users'][user['id_str']] = {
                 'screen_name': user['screen_name']
             }
         delete_users.append(user['screen_name'].lower())
     for screen_name in delete_users:
         del screen_name_case_map[screen_name]
     return screen_name_case_map.values()
Пример #4
0
    def dehydrate(self, tweet_ids: List[str]):
        t = Twarc(self.configuration["twitter"]["consumer_key"],
                  self.configuration["twitter"]["consumer_secret"],
                  self.configuration["twitter"]["access_token"],
                  self.configuration["twitter"]["access_token_secret"],
                  tweet_mode="extended")
        count: int = 0
        print("Reading tweets from Twitter")
        with tqdm(total=self.configuration["sampling"]["size"],
                  unit="tweet") as written_progress_bar:
            with tqdm(total=len(tweet_ids),
                      unit="tweet") as hydrate_progress_bar:
                for tweet in t.hydrate(tweet_ids):
                    hydrate_progress_bar.update(1)
                    if any(keyword in tweet["full_text"].lower() for keyword in
                           self.configuration["sampling"]["keywords"]):
                        append: bool = True

                        if "only_media" in self.configuration["sampling"].keys(
                        ):
                            if self.configuration["sampling"]["only_media"]:
                                if not self.contains_media(tweet):
                                    append = False

                        if len(self.configuration["sampling"]
                               ["languages"]) > 0:
                            if tweet["lang"] not in self.configuration[
                                    "sampling"]["languages"]:
                                append = False
                        if append:
                            written_progress_bar.update(1)
                            count += 1
                            yield tweet
                        if count == self.configuration["sampling"]["size"]:
                            return
Пример #5
0
    def __init__(self,
                 consumer_key,
                 consumer_secret,
                 access_token_key="",
                 access_token_secret=""):
        """
        This method authenticates and creates a twitterapi object.
        In case the system is unable to authenticate the object, a SystemError is returned.
        """
        auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
        try:
            auth.get_authorization_url()
        except TweepError as e:
            print("Unable to authenticate", str(e))
            raise ApplicationError(*error_list["AUTH_ERROR"])

        auth.set_access_token(access_token_key, access_token_secret)
        self._api = tweepy.API(auth_handler=auth)
        try:
            self._api_twarc = Twarc(cnst.CONSUMER_KEY, cnst.CONSUMER_SECRET,
                                    cnst.ACCESS_TOKEN_KEY,
                                    cnst.ACCESS_TOKEN_SECRET)
        except Exception as e:
            print("Unable to authenticate", str(e))
            raise ApplicationError(*error_list["AUTH_ERROR"])
def get_account(item):
    """
	Uses the Twarc libtrary to surface all the tweet twarc can see via a twitter username
	Searches for media in all tweets - if it can find any it also tries to download that media item
	"""
    item.agent_name = agent_name + "_1_get_account"
    if not os.path.exists(item.storage_folder):
        os.makedirs(item.storage_folder)
    t = Twarc(twitter_consumer_key, twitter_consumer_secret,
              twitter_access_token, twitter_access_token_secret)
    name = item.url.strip().replace("https://twitter.com/",
                                    "").replace("?", "")
    file_path = os.path.join(
        item.storage_folder,
        "{}_{}.json".format(time.strftime("%d-%m-%Y_%H-%M-%S"), name))
    if not os.path.exists(item.storage_folder):
        os.makedirs(item.storage_folder)
    tweets = []
    for tweet in t.timeline(screen_name=name):
        tweets.append(tweet)
    tweets = filter_tweets_by_start_date(tweets, item.date_range)
    for tweet in tweets:
        get_assets(tweet, item.storage_folder)
    with open(file_path, "w") as outfile:
        json.dump(tweets, outfile)
    item.completed = True
    return item
def get_tweet(item):
    """
	takes a tweet id and uses the twarc lib to harvest it
	searches for media in the tweet - if it can find any it also tries to download that media item
	"""
    item.agent_name = agent_name + "_1_get_tweet"
    if not os.path.exists(item.storage_folder):
        os.makedirs(item.storage_folder)
    my_content_types = []
    url = item.url
    if url.endswith("/"):
        url = url[:-1]
    __, __id = url.rsplit("/", 1)

    t = Twarc(twitter_consumer_key, twitter_consumer_secret,
              twitter_access_token, twitter_access_token_secret)
    for tweet in t.hydrate([__id]):
        get_assets(tweet, item.storage_folder)
        file_path = os.path.join(
            item.storage_folder,
            "{}_{}.json".format(time.strftime("%d-%m-%Y_%H-%M-%S"),
                                tweet['id']))
        with open(file_path, "w") as outfile:
            json.dump(tweet, outfile)
    item.completed = True
    return item
Пример #8
0
def collect_timelines(input_file, output_file, credentials_file):
    with open(credentials_file) as fp:
        credentials = tuple(map(str.strip, fp.readlines()))
    twarc_obj = Twarc(*credentials)
    df = pd.read_csv(input_file, sep="\t")
    with open(output_file, "w+") as fp:
        total = 0
        found_users = 0
        pbar = tqdm.tqdm(df.values)
        for uid, tid, u_statuses in pbar:
            found = 0
            pbar.set_description("User {}".format(uid))
            try:
                for tweet_json in twarc_obj.timeline(user_id="{}".format(uid)):
                    found += 1
                    if found > 190:
                        break
                    total += 1
                    print(json.dumps(tweet_json), file=fp)
                    pbar.set_postfix(found=found_users + 1, total=total)
            except requests.exceptions.HTTPError as e:
                pbar.write("Error for uid={}. {}".format(uid, e))
            else:
                found_users += 1
        pbar.close()
    print("Collected {} tweets.".format(total))
Пример #9
0
 def _create_twarc(self):
     self.twarc = Twarc(self.message["credentials"]["consumer_key"],
                        self.message["credentials"]["consumer_secret"],
                        self.message["credentials"]["access_token"],
                        self.message["credentials"]["access_token_secret"],
                        http_errors=self.http_errors,
                        connection_errors=self.connection_errors)
Пример #10
0
def stream_city(cf, city, keywords=None):
    bbox = {
        "great_syd": [149.971885992, -34.33117400499998, 151.63054702400007, -32.99606922499993],
        "great_mel": [144.33363404800002, -38.50298801599996, 145.8784120140001, -37.17509899299995],
        "great_brisbane": [152.07339276400012, -28.363962911999977, 153.54670756200005, -26.452339004999942],
        "great_ald": [138.435645001, -35.350296029999974, 139.04403010400003, -34.50022530299998]
    }
    
    if keywords == None:
        keywords = cf["search_words"]
    
    t = Twarc(**cf['account'])

    # no keyword restriction but from a specific city
    # reason see this https://stackoverflow.com/questions/22889122/how-to-add-a-location-filter-to-tweepy-module
    if not os.path.isdir(city+"/"):
        os.makedirs(city)
    
    path = city + "/" + str(datetime.date.today())+".jsonl"

    locations = ",".join([str(i) for i in bbox[city]])

    for tweet in t.filter(locations=locations):
        print("get one tweet") #TODO
        send_to_db(tweet)
Пример #11
0
 def collect_replies(self):
     """
     Collect replies for all tweets from query using twarc
     :return:
     """
     twarc = Twarc(self.ak, self.aks, self.at, self.ats)
     reply_count = 0
     # loop through all parent tweets from query
     for tweet in self.tweets:
         replies = []
         reps = twarc.replies(
             self.tweepy_to_twarc(tweet),
             recursive=False)  # get iterator for replies from twarc
         rep = next(
             reps)  # first "rep" is the parent tweet so we don't use it
         i = 0
         # max 30 replies
         while i < 30:
             try:
                 rep = next(reps)  # get next reply and add it to list
                 replies.append(rep)
                 i = i + 1
             except StopIteration:
                 break
             except Exception as e:
                 print('error: ', e)
         self.dict[tweet.id] = replies  # add tweet to dict {id:replies}
         reply_count += len(replies)
     print(reply_count, ' replies were collected')
Пример #12
0
def pull_tweet(input_file_name):
    CONSUMER_KEY = "9At2u3Y2DraTHLSg3D9w6LhE9"
    CONSUMER_KEY_SECRET = "DRFCbI2t0gMhfV2KnEub6cljowW9zRwmkeMJ0GT9MlMkrkzspM"
    ACCESS_TOKEN = "1259913765614751745-LwtSI48si3sYekzvxW86syIFsRgirl"
    ACCESS_TOKEN_SECRET = "e0gpJdT0IXOSxFrhplKMl8FlP0dVnuLg1vwBHzt5Fc9J9"

    t = Twarc(CONSUMER_KEY, CONSUMER_KEY_SECRET, ACCESS_TOKEN,
              ACCESS_TOKEN_SECRET)
    inputF = open(input_file_name, "r")
    line = inputF.readline()
    data = []
    i = 0
    while line != "" and i < 10:
        try:
            tweet = t.tweet(line.strip())
            if tweet["lang"] == "en":
                if 'retweeted_status' in tweet.keys():
                    data.append(tweet['retweeted_status']['full_text'].replace(
                        '\n', ' '))
                else:
                    data.append(data, tweet['full_text'].replace('\n', ' '))
                i += 1
            line = inputF.readline()
        except Exception as e:
            line = inputF.readline()
    return data
Пример #13
0
def authorize():
    """
    Return authorized Twarc handler with the credentials stored in config file.
    """
    config = configuration()['twitter']
    twarc_auth = Twarc(config['consumer_key'], config['consumer_secret'],
                       config['access_token'], config['access_token_secret'])
    return twarc_auth
Пример #14
0
def get_twitter_keys(profile=None, twarc_config=None):
    twarc = Twarc(config=twarc_config, profile=profile)
    return {
        'consumer_key': twarc.consumer_key,
        'consumer_secret': twarc.consumer_secret,
        'access_token': twarc.access_token,
        'access_token_secret': twarc.access_token_secret
    }
Пример #15
0
 def _create_twarc(self):
     return Twarc(self.config["keys"]["consumer_key"],
                  self.config["keys"]["consumer_secret"],
                  self.config["keys"]["access_token"],
                  self.config["keys"]["access_token_secret"],
                  http_errors=self.http_errors,
                  connection_errors=self.connection_errors,
                  tweet_mode="extended")
def main():
    """
    Main program
    """
    # argument check
    if len(sys.argv) > 1:
        # if argument file exists
        if os.access(sys.argv[1], os.R_OK):
            input_file = sys.argv[1]
        else:
            sys.stderr.write("ERROR, NEED VALID FILE\n")
            sys.exit(1)
    else:
        sys.stderr.write("ERROR, NEED FILE\n")
        sys.exit(1)

    # check if data folder exists or create it
    if not os.path.isdir("data"):
        os.makedirs("data")

    # keep running stream function (every hour)
    while True:

        # string of streaming words
        print "Starting"
        keys = ""
        lines = []
        projects = []

        # open file for read
        with open(input_file, "r") as fr:
            string_txt = fr.read()
            projects = json.loads(string_txt)
            #for line in fr:
            #    # empty line
            #    if line != '\n':
            #        # remove white chars in start and end of line
            #        line = line.rstrip('\n\t ')
            #        line = line.strip('\t ')
            #        # append line to array and string
            #        keywords = line.split("::")[1:]
            #        project_id = line.split("::")[0]
            #        keys = keys + ",".join(keywords) + ","
            #        project = {"id":project_id, "name": keywords[0], "keywords":keywords}
            #        projects.append(project)
        for project in projects:
            keys += ",".join(project["synonyms"]) + ","

        print("Projects %s" % str(projects))
        keys = keys.rstrip(",")

        # create Twarc class
        t = Twarc(client_key, client_secret, access_token, access_token_secret)

        # call stream function every hour
        if stream(keys, projects, t) != True:
            sys.stderr.write("ERROR, STREAM QUITS\n")
            sys.exit(1)
Пример #17
0
def test_search():
    count = 0
    t = Twarc()
    for tweet in t.search('obama'):
        assert tweet['id_str']
        count += 1
        if count == 10:
            break
    assert count == 10
Пример #18
0
def test_since_id():
    t = Twarc()
    for tweet in t.search('obama'):
        id = tweet['id_str']
        break
    assert id
    time.sleep(5)
    for tweet in t.search('obama', since_id=id):
        assert tweet['id_str'] > id
Пример #19
0
    def __init__(self, secretsfile='/Users/sara/twittersecrets.txt'):

        fsecret = open(secretsfile, 'r')
        secrets = fsecret.readline()
        access_token, access_token_secret, consumer_key, consumer_secret = \
            [x.strip() for x in secrets.split(',')]

        self.twarc = Twarc(consumer_key, consumer_secret, access_token,
                           access_token_secret)
def get_interactions(consumer_key, consumer_secret, access_token, access_token_secret):
    """
    Arguments are Twitter API credentials. To get them you can go here http://apps.twitter.com/.
    Saves pickled lists of tweet authors and users they mention, and a list of users considered.
    """
    from twarc import Twarc
    from tqdm import tqdm
    import pickle

    t = Twarc(consumer_key,
            consumer_secret,
            access_token,
            access_token_secret)

    list_ids = ["1335885096063295488",
                "1288082572195639296",
                "1287444819015618561",
                "1283739792702713856",
                "1081734288368898048",
                "910757441855459328",
                "193445218",
                "90205656",
                "85315110"]

    users = set([m['screen_name'] for lid in list_ids for m in t.list_members(lid)])

    users_to_exclude = ['premierleague',
                        'SpursOfficial',
                        'Arsenal',
                        'ManCity',
                        'sterling7',
                        'kylewalker2',
                        'HKane',
                        'benmendy23',
                        'dele_official',
                        'RobHolding95',
                        'm8arteta']

    [users.remove(u) for u in users_to_exclude]

    authors = []
    mentions = []

    for user in tqdm(users):
        tl = t.timeline(screen_name=user)
        tweets = [tt for tt in tl]
        m = [u['screen_name'] for tw in tweets for u in tw['entities']['user_mentions']]
        a = [user] * len(m)
        mentions.append(m)
        authors.append(a)

    flat_a = [item for sublist in authors for item in sublist]
    flat_m = [item for sublist in mentions for item in sublist]

    pickle.dump(flat_a, open('authors.p', 'wb'))
    pickle.dump(flat_m, open('mentions.p', 'wb'))
    pickle.dump(users, open('users.p', 'wb'))
Пример #21
0
    def __init__(self, search_terms):

        logging.info("initializing TwitterStream Kafka")

        # globals to all instances
        self.t = Twarc(localConfig.client_key, localConfig.client_secret,
                       localConfig.access_token,
                       localConfig.access_token_secret)
        self.search_terms = search_terms
Пример #22
0
def test_paging():
    # pages are 100 tweets big so if we can get 500 paging is working
    t = Twarc()
    count = 0
    for tweet in t.search('obama'):
        count += 1
        if count == 500:
            break
    assert count == 500
def main():
    if len(sys.argv) > 2:
        screen_name = sys.argv[1]
        keyword = sys.argv[2]

        t = Twarc(consumer_key, consumer_secret, access_token, access_secret)
        follower_ids = get_followers_id(t, screen_name)
        get_screen_name(t, screen_name, follower_ids, keyword)
    else:
        print('Usage: python poc.py fs0c131y chowkidar')
Пример #24
0
def test_stream():
    t = Twarc()
    count = 0
    for tweet in t.stream("obama"):
        assert tweet['id_str']
        assert tweet['text']
        count += 1
        if count == 50:
            break
    assert count == 50
Пример #25
0
def main(get_method=None, input_hashtags=None, storage_location=None):
    if not os.path.exists(storage_location):
        os.makedirs(storage_location, exist_ok=True)

    hashtag_query = input_hashtags.strip().replace(",", "+OR+")

    try:
        tweets = 0
        t = Twarc(
            consumer_key,
            consumer_secret,
            access_token,
            access_token_secret,
            tweet_mode="extended",
        )

        print(
            "Started storing tweets related to "
            + input_hashtags
            + " at "
            + storage_location
            + " since "
            + str(datetime.datetime.now())
        )

        if get_method == "populate":
            for tweet in t.search(hashtag_query, lang=language):
                with open(
                    os.path.join(
                        storage_location + "tweet" + str(tweet["id"]) + ".json"
                    ),
                    "w",
                    encoding="utf8",
                ) as file:
                    json.dump(tweet, file)
                    tweets += 1

        elif get_method == "track":
            for tweet in t.filter(hashtag_query):
                with open(
                    storage_location + "/tweet" + str(tweet["id"]) + ".json",
                    "w",
                    encoding="utf8",
                ) as file:
                    json.dump(tweet, file)
                    tweets += 1
        else:
            print("No method defined, exiting...")

    except KeyboardInterrupt:
        print("Shutdown requested...successfully stored " + str(tweets) + " tweets")
    except BaseException:
        traceback.print_exc(file=sys.stdout)

    sys.exit(0)
Пример #26
0
    def get_traing_data(self):
        '''
        :return: combined data (tweets info and trec-is data) as dictionary {tweet_id: Tweet}
        '''
        # load tweets retrieved by TREC-Tweets downloader
        # retrieved_tweets, f_name = self.load_Tweets()
        #retrieved_tweets, f_name = self.load_event_tweets()
        file = open('data/all_tweets.pkl', 'rb')
        retrieved_tweets = pickle.load(file)
        file.close()

        missed_tweets = []
        training_data = {}  # dict {'tweet id': Tweet}

        # load TREC data data: tweetsID, tweet_priority, tweet_categories, indicator_terms
        events = json.load(open(self.trec_path))
        events = pd.DataFrame.from_dict(events['events'], orient='columns')

        for _, event in events.iterrows():

            for trec_tweet in event['tweets']:
                if trec_tweet[
                        'postID'] in retrieved_tweets:  # check if tweets_full is retrieved ?
                    retriev_tweet = retrieved_tweets[trec_tweet['postID']]
                    training_data[trec_tweet['postID']] = Tweet(
                        id=retriev_tweet.id,
                        text=retriev_tweet.text,
                        metadata=retriev_tweet.metadata,
                        priority=trec_tweet['priority'],
                        indicatorTerms=trec_tweet['indicatorTerms'],
                        categories=trec_tweet['categories'],
                        event_type=trec_tweet['event_type'])
                else:
                    # adding missed tweets
                    training_data[trec_tweet['postID']] = Tweet(
                        id=trec_tweet['postID'],
                        priority=trec_tweet['priority'],
                        indicatorTerms=trec_tweet['indicatorTerms'],
                        categories=trec_tweet['categories'],
                        event_type=trec_tweet['event_type'])
                    missed_tweets.append(trec_tweet['postID'])

        # Retrieve the missed tweets by Twarc tool and combine with training data
        t = Twarc(self.consumer_key, self.consumer_secret, self.access_token,
                  self.access_token_secret)

        tweets_twarc = t.hydrate(
            iter(missed_tweets))  # retrieve all tweets by IDs

        for twtt in tweets_twarc:
            training_data[str(twtt['id'])].add_tweets_data(
                twtt['full_text'], {'created_at': twtt['created_at']})

        return training_data
Пример #27
0
def main():
    """
	Main program
	"""
    # argument check
    if len(sys.argv) > 1:
        # if argument file exists
        if os.access(sys.argv[1], os.R_OK):
            input_file = sys.argv[1]
        else:
            sys.stderr.write("ERROR, NEED VALID FILE\n")
            sys.exit(1)
    else:
        sys.stderr.write("ERROR, NEED FILE\n")
        sys.exit(1)

    # string of streaming words
    keys = ""

    # open file for read
    with open(input_file, "r") as fr:
        for line in fr:
            # empty line
            if line != '\n':
                # remove white chars in start and end of line
                line = line.rstrip('\n\t ')
                line = line.strip('\t ')
                # append line to array
                keys = keys + line + ","

    keys = keys.rstrip(",")

    # check if data folder exists or create it
    if not os.path.isdir("data"):
        os.makedirs("data")

    # keep running stream function (every hour)
    while True:
        # create Twarc class
        t = Twarc(client_key, client_secret, access_token, access_token_secret)

        # call stream function every hour
        if stream(keys, t) != True:
            sys.stderr.write("ERROR, STREAM QUITS\n")
            sys.exit(1)

        # open file for statistics of user tweets
        with open("data/statistics.txt", "w") as fs:
            # write user's id + number of tweets to file
            for key, value in sorted(friends.iteritems(),
                                     key=lambda (k, v): (v, k),
                                     reverse=True):
                fs.write(str(key) + " : " + str(value) + "\n")
  def __init__(self, target_list=[], data_dir='', secretfile='/Users/sara/twittersecrets.txt', 
               getimages=True):

    # Set up link to Twitter
    with open(secretfile, 'r') as fsecret: 
      secrets = fsecret.readline()
    access_token, access_token_secret, consumer_key, consumer_secret = \
        [x.strip() for x in secrets.split(',')]
    twarc = Twarc(consumer_key, consumer_secret, access_token, access_token_secret)

    self.target_list = target_list
    self.data_dir = data_dir
    self.getimages = getimages
Пример #29
0
def test_max_id():
    t = Twarc()
    for tweet in t.search('obama'):
        id = tweet['id_str']
        break
    assert id
    time.sleep(5)
    count = 0
    for tweet in t.search('obama', max_id=id):
        count += 1
        assert tweet['id_str'] <= id
        if count > 100:
            break
Пример #30
0
def test_hydrate():
    ids = [
        "501064188211765249", "501064196642340864", "501064197632167936",
        "501064196931330049", "501064198005481472", "501064198009655296",
        "501064198059597824", "501064198513000450", "501064180468682752",
        "501064199142117378", "501064171707170816", "501064200186118145",
        "501064200035516416", "501064201041743872", "501064201251880961",
        "501064198973960192", "501064201256071168", "501064202027798529",
        "501064202245521409", "501064201503113216", "501064202363359232",
        "501064202295848960", "501064202380115971", "501064202904403970",
        "501064203135102977", "501064203508412416", "501064203516407810",
        "501064203546148864", "501064203697156096", "501064204191690752",
        "501064204288540672", "501064197396914176", "501064194309906436",
        "501064204989001728", "501064204980592642", "501064204661850113",
        "501064205400039424", "501064205089665024", "501064206666702848",
        "501064207274868736", "501064197686296576", "501064207623000064",
        "501064207824351232", "501064208083980290", "501064208277319680",
        "501064208398573568", "501064202794971136", "501064208789045248",
        "501064209535614976", "501064209551994881", "501064141332029440",
        "501064207387742210", "501064210177331200", "501064210395037696",
        "501064210693230592", "501064210840035329", "501064211855069185",
        "501064192024006657", "501064200316125184", "501064205642903552",
        "501064212547137536", "501064205382848512", "501064213843169280",
        "501064208562135042", "501064214211870720", "501064214467731457",
        "501064215160172545", "501064209648848896", "501064215990648832",
        "501064216241897472", "501064215759568897", "501064211858870273",
        "501064216522932227", "501064216930160640", "501064217667960832",
        "501064211997274114", "501064212303446016", "501064213675012096",
        "501064218343661568", "501064213951823873", "501064219467341824",
        "501064219677044738", "501064210080473088", "501064220415229953",
        "501064220847656960", "501064222340423681", "501064222772445187",
        "501064222923440130", "501064220121632768", "501064222948593664",
        "501064224936714240", "501064225096499201", "501064225142624256",
        "501064225314185216", "501064225926561794", "501064226451259392",
        "501064226816143361", "501064227302674433", "501064227344646144",
        "501064227688558592", "501064228288364546", "501064228627705857",
        "501064229764751360", "501064229915729921", "501064231304065026",
        "501064231366983681", "501064231387947008", "501064231488200704",
        "501064231941570561", "501064232188665856", "501064232449114112",
        "501064232570724352", "501064232700350464", "501064233186893824",
        "501064233438568450", "501064233774510081", "501064235107897344",
        "501064235175399425", "501064235456401410",
    ]
    t = Twarc()
    count = 0
    for tweet in t.hydrate(iter(ids)):
        assert tweet['id_str']
        count += 1
    assert count > 100 # may need to adjust as these might get deleted