示例#1
0
def activate(args):

    if (datetime.datetime.fromtimestamp(time.time()) -
            datetime.datetime.strptime(args['from_date'],
                                       '%Y-%m-%d')).days < 30:
        print("will use 30-day dev environment")
        premium_search_args = load_credentials(
            "~/.twitter_keys.yaml",
            yaml_key="search_tweets_premium_30day",
            env_overwrite=False)
    else:
        print("will use full-archive dev environment")
        premium_search_args = load_credentials(
            "~/.twitter_keys.yaml",
            yaml_key="search_tweets_premium_fullarchive",
            env_overwrite=False)

    print("query: %s" % (args['query']))
    print("start_date: %s end_date: %s" % (args['from_date'], args['to_date']))
    print("frequency: %d max_results: %d" %
          (args['frequency'], args['max_results']))
    print("file_name from args:", args['filename'])

    test_dates = days_to_collect(args['from_date'], args['to_date'],
                                 args['frequency'])
    print("test dates\n", test_dates)

    user_input = input(
        "press enter to proceed and any other button to cancel: ")

    if user_input != '':
        print("aborting")
        exit(0)

    tweets = []
    for i in range(0, len(test_dates[:-1])):
        # test_dates reversed. Eg. 2018-10-31 -> 2018-10-30
        # collect_tweets requires forward collection: collect_tweets(from, to, max_results=100)
        tweets = np.append(
            tweets,
            collect_tweets(args['query'], test_dates[i], test_dates[i + 1],
                           args['results_per_call'], args['max_results'],
                           premium_search_args))

        # Requests are limited to 30 per minute for sandbox, 60 for subscriptions
        # Requests are limited to 10 per second
        num_calls = (i + 1) * args['max_results'] // args['results_per_call']
        if num_calls % 5 == 0 and num_calls % 20 != 0:
            print("waiting 10 seconds")
            time.sleep(10)

    # flip tweets back so that the rows are in increasing days
    tweets = list(reversed(tweets))

    S2 = to_df(tweets)
    print("collected tweets\n", S2)

    # save file to csv
    S2.to_csv(args['filename'], index=False)
    print('saved file', args['filename'])
 def authenticate(self):
     """ authenticate using either the sandbox or premium api with yaml configs from twitter_keys.yaml """
     if self.do_sandbox:
         self.premium_search_args = load_credentials(filename=self.cred_file,
                                                     yaml_key='full_tweets_api_sandbox', env_overwrite=False)
     else:
         self.premium_search_args = load_credentials(filename=self.cred_file,
                                                     yaml_key='search_tweets_api', env_overwrite=False)
示例#3
0
def usersTweetsByIds():

    search_args1 = load_credentials(".twitter_keys.yaml",
                                    yaml_key="search_tweets_v2_id",
                                    env_overwrite=False)

    search_args2 = load_credentials(".twitter_keys.yaml",
                                    yaml_key="search_tweets_v2_user",
                                    env_overwrite=False)

    f = open(
        'C:\\Users\\Josh\\Documents\\GitHub\\search-tweets-python\\enUsers_Tweets.json',
        'r',
        encoding='utf-8')

    obj = json.load(f)

    for u in obj['includes']:

        idList = u.get('tweetids')

        ids = ''

        idList = list(set(idList))

        if len(idList) == 0:
            u['tweets'] = []
            continue

        if len(idList) > 99:
            ids = ','.join(idList[0:99])
        else:
            ids = ','.join(idList)

        endTweet = 'https://api.twitter.com/2/tweets'

        query = {"ids": ids, "tweet.fields": "author_id,public_metrics,text"}
        rs = ResultStream(request_parameters=query,
                          endpoint=endTweet,
                          bearer_token=bt)

        tweets = []
        result = list(rs.stream())

        for r in result:

            tweets = r.get('data')

        u['tweets'] = tweets

    fo = open('Random_WithTweets.json', 'w', encoding='utf-8')
    json.dump(obj, fo)
示例#4
0
def main():
    args_dict = vars(parse_cmd_args().parse_args())
    if args_dict.get("debug") is True:
        logger.setLevel(logging.DEBUG)
        logger.debug("command line args dict:")
        logger.debug(json.dumps(args_dict, indent=4))

    if args_dict.get("config_filename") is not None:
        configfile_dict = read_config(args_dict["config_filename"])
    else:
        configfile_dict = {}

    extra_headers_str = args_dict.get("extra_headers")
    if extra_headers_str is not None:
        args_dict['extra_headers_dict'] = json.loads(extra_headers_str)
        del args_dict['extra_headers']

    logger.debug("config file ({}) arguments sans sensitive args:".format(
        args_dict["config_filename"]))
    logger.debug(json.dumps(_filter_sensitive_args(configfile_dict), indent=4))

    creds_dict = load_credentials(filename=args_dict["credential_file"],
                                  account_type=args_dict["account_type"],
                                  yaml_key=args_dict["credential_yaml_key"],
                                  env_overwrite=args_dict["env_overwrite"])

    dict_filter = lambda x: {k: v for k, v in x.items() if v is not None}

    config_dict = merge_dicts(dict_filter(configfile_dict),
                              dict_filter(creds_dict), dict_filter(args_dict))

    logger.debug("combined dict (cli, config, creds) sans password:"******"ERROR: not enough arguments for the program to work")
        sys.exit(1)

    stream_params = gen_params_from_config(config_dict)
    logger.debug(
        "full arguments passed to the ResultStream object sans password")
    logger.debug(json.dumps(_filter_sensitive_args(stream_params), indent=4))

    rs = ResultStream(tweetify=False, **stream_params)

    logger.debug(str(rs))

    if config_dict.get("filename_prefix") is not None:
        stream = write_result_stream(
            rs,
            filename_prefix=config_dict.get("filename_prefix"),
            results_per_file=config_dict.get("results_per_file"))
    else:
        stream = rs.stream()

    for tweet in stream:
        if config_dict["print_stream"] is True:
            print(json.dumps(tweet))
def load():
    config = twitter_conifg()
    base_date = datetime.datetime.today()
    date_list = [base_date - datetime.timedelta(days=x) for x in range(5)]
    date_list.reverse()
    all_tweets = []
    for idx, date in enumerate(date_list):
        if idx != 4:
            final_date = date + datetime.timedelta(days=1)
            search_args = load_credentials(
                filename="./configs/twitter_api.yaml",
                yaml_key="search_tweets_v2",
                env_overwrite=False)

            query = gen_request_parameters(
                config['query'],
                results_per_call=100,
                place_fields='country',
                start_time=date.strftime('%Y-%m-%d'),
                end_time=final_date.strftime('%Y-%m-%d'))

            tweets = collect_results(query,
                                     max_tweets=1000,
                                     result_stream_args=search_args)

            def add_date(x):
                x['fecha'] = date.strftime('%Y-%m-%d')

                return x

            tweets = list(map(add_date, tweets))
            all_tweets.append(tweets)

    all_tweets = reduce(lambda x, y: x + y, all_tweets)
    return all_tweets
示例#6
0
def twitter_auth():
    twitter_credentials_file = os.path.join(os.getcwd(), "Credentials",
                                            "twitter_creds.yaml")
    search_tweets_api = 'search_tweets_30_day_dev'
    return load_credentials(filename=twitter_credentials_file,
                            yaml_key=search_tweets_api,
                            env_overwrite=False)
示例#7
0
def searchtweets_query(file_name='../../Twitter_cred/full_arch_2007-2020.txt'):

    premium_search_args = load_credentials(
        filename="~/.twitter_keys.yaml",
        yaml_key="search_tweets_fullarchive_dev",
        env_overwrite=False)

    print(premium_search_args)
    print()

    #query = "(nat OR natte OR water OR wateroverlast OR regen OR storm OR blank OR bui OR overstroming OR hoosbui OR schade OR noodweer OR wolkbreuk OR waterschade) has:geo place_country:NL"
    query = "(wateroverlast OR overstroming OR waterschade) has:geo place_country:NL"

    do_query = False

    if do_query:
        from_date = "2007-01-01"
        to_date = "2020-01-01"
        rule = gen_rule_payload(query,
                                results_per_call=500,
                                from_date=from_date,
                                to_date=to_date)
        tweets = collect_results(rule,
                                 max_results=500 * 50,
                                 result_stream_args=premium_search_args
                                 )  # change this if you need to
        for tweet in tweets:
            with open(file_name, 'a') as fp:
                fp.write(json.dumps(tweet) + '\n')
    else:
        print(
            "No query was done, in order to perform a Twitter query, set do_query to True in Twitter/searchtweets_query.py"
        )
示例#8
0
def search(queryString, outputpath, api_key_yaml,startTime="2016-01-01",endTime="2021-03-15", lang="en"):

    search_args = load_credentials(api_key_yaml,
                                   yaml_key="search_tweets_v2",
                                   env_overwrite=False)

    print("Should be 1024, but it:")
    print(len(queryString + " -is:nullcast -is:retweet -is:verified -is:quote " + "lang:"+lang))

    #,created_at,geo,in_reply_to_user_id,lang,author_id,conversation_id,public_metrics,entities,context_annotations
    query = gen_request_parameters(query=queryString.strip() + " -is:nullcast -is:retweet -is:verified -is:quote " + "lang:"+lang, media_fields="media_key,type",user_fields="id,description,location,name,entities,url,username,public_metrics,verified,withheld,protected",tweet_fields="id,text,created_at,geo,in_reply_to_user_id,lang,author_id,conversation_id,public_metrics,entities,context_annotations,attachments",start_time=startTime,end_time=endTime, stringify=False, expansions="author_id,attachments.media_keys",results_per_call=500)

    rs = ResultStream(request_parameters=query, max_tweets=sys.maxsize, max_requests=sys.maxsize, **search_args)
    i = 0
    with open(outputpath, 'w') as outputcsv:
        writer = csv.writer(outputcsv)
        writer.writerow(headers)
        for tweet in rs.stream():
            # print(tweet)
            if "id" in tweet:
                writer.writerow(createRow(headers, tweet))
            if "users" in tweet:
                print("parsing users")
                dump_users_info(tweet,outputpath.replace(".csv",str(i) +"-users.csv"))
                i+=1
    def arquive_search(self,
                       query,
                       start,
                       end,
                       dev_env,
                       max_size=2500,
                       max_call=100):
        self.settings['search_tweets_api']['endpoint'] =\
           f"https://api.twitter.com/1.1/tweets/search/fullarchive/{dev_env}.json"

        credentials = load_credentials("archive_keys.yaml",
                                       yaml_key="search_tweets_api",
                                       env_overwrite=False)

        with open('archive_keys.yaml', 'w') as config_file:
            yaml.dump(self.settings, config_file, default_flow_style=False)

        q_rule = gen_rule_payload(query,
                                  results_per_call=max_call,
                                  from_date=start,
                                  to_date=end)

        rs = ResultStream(rule_payload=q_rule,
                          max_results=max_size,
                          **credentials)

        with open('tweet_data_archive.csv', 'a', encoding='utf-8') as file:
            n = 0
            for tweet in rs.stream():
                n += 1
                if n % (max_size / 10) == 0:
                    print('{0}: {1}'.format(str(n), tweet['created_at']))
                json.dump(tweet, file)
                file.write('\n')
示例#10
0
def count_tweets(query,
                 from_date,
                 to_date,
                 credentials_path,
                 yaml_key,
                 count_bucket="day",
                 results_per_call=500,
                 verbose=False,
                 **kwargs):
    """
    Returns the number of existing Tweets for a given query and time
    frame. Since this function doesn't pull tweets, this is a safe option
    to check the effectiveness of your filters without exhausting the
    API's capacity.

    Parameters
    ----------
    query : str
        Query passed to the Twitter API to fecth Tweets.
    from_date : str or None
        Date format as specified by `convert_utc_time` for the starting time
        of your search.
    to_date : str or None
        Date format as specified by `convert_utc_time` for the end time of
        your search.
    credentials_path : str
        Path for the yaml file with the Twitter API credentials.
    yaml_key : str
        Key within the yaml file containing the Twitter API credentials to be
        used.
    count_bucket : str or None, default="day"
        If using the counts api endpoint, will define the count bucket for
        which tweets are aggregated.
    results_per_call : int, default=500
        Number of Tweets returned per call.
    verbose : int or bool, default=False
        Controls the verbosity when pulling the tweet count.

    Returns
    -------
    counts : dict
        Number of existing tweets for each bucket.
    """

    logger = logging.getLogger(__name__)
    logger.propagate = verbose
    logger.info('Counting Tweets')

    search_args = load_credentials(credentials_path, yaml_key=yaml_key)

    count_rule = gen_rule_payload(query,
                                  from_date=from_date,
                                  to_date=to_date,
                                  count_bucket=count_bucket,
                                  results_per_call=results_per_call)

    counts = collect_results(count_rule, result_stream_args=search_args)

    return counts
 def __init__(self, cred_file, yaml_key):
     """
     Initialize an object with loading the credentials
     using a credentials file and yaml key
     """
     self.premium_search_args = load_credentials(cred_file,
                                                 yaml_key=yaml_key,
                                                 env_overwrite=False)
示例#12
0
def use_premium(search, filename, from_date, to_date, enpoint='full'):
    '''
    Collect historical tweets
    '''
    if endpoint == '30day':
        endpoint_key = 'search_premium_30day_api'
        #endpoint_key = 'search_lynxx_30day_api'
    else:
        endpoint_key = 'search_premium_full_api'
        #endpoint_key = 'search_lynxx_full_api'

    try:
        tweet_df = pd.read_csv(filename, dtype=str, encoding='ISO-8859-1')
    except FileNotFoundError:
        tweet_df = pd.DataFrame()

    # Extract the credentials for the endpoint.
    search_stream = load_credentials(filename='./credentials.yaml',
                                     yaml_key=endpoint_key,
                                     env_overwrite=False)

    # Collect tweets while we are permitted.
    # Todo: Still dont know how to catch the re-try limit error?
    while to_date > from_date:

        rule = gen_rule_payload(search,
                                from_date=from_date,
                                to_date=to_date,
                                results_per_call=100)
        try:
            tweets = collect_results(rule,
                                     max_results=2000,
                                     result_stream_args=search_stream)
        except:
            break

        for idx, tweet in enumerate(tweets):
            tweet_df = tweet_df.append([json_normalize(tweet)],
                                       ignore_index=True,
                                       sort=False)

            if idx % 1000 == 0:
                print(f'{tweet["created_at"]}: {tweet["text"]}')
                tweet_df.to_csv(filename, index=False)

        tweet_df['created_at'] = pd.to_datetime(tweet_df['created_at'],
                                                utc=True)
        mindate = min(tweet_df['created_at']).date() - timedelta(hours=1)
        to_date = mindate.strftime('%Y-%m-%d %H:%M')

    tweet_df['created_at'] = pd.to_datetime(tweet_df['created_at'])
    min(tweet_df['created_at'])

    tweet_df.drop_duplicates(subset=['created_at', 'user.screen_name'],
                             keep='first',
                             inplace=True)
    tweet_df.sort_values(by='created_at', inplace=True)
    tweet_df.to_csv(filename, index=False)
示例#13
0
 def authenticate(self):
     self.premium_search_args = searchtweets.load_credentials(
         "{0}/.twitter_keys.yaml".format(ROOT_DIR),
         yaml_key="search_tweets_30_day",
         env_overwrite=False)
     auth = tweepy.OAuthHandler(self.twitter_api_key,
                                self.twitter_api_secret)
     auth.set_access_token(self.twitter_access_token,
                           self.twitter_access_token_secret)
     self.api = tweepy.API(auth)
示例#14
0
def coll_cantera_neg():
    premium_search_args = load_credentials(filename="./twitter_keys.yaml",
                                           yaml_key="search_tweets_30_day_dev",
                                           env_overwrite=False)
    bcp = search_lima('(bcp OR BCPComunica)', premium_search_args)
    bbva = search_lima('(bbva)', premium_search_args)
    interbank = search_lima('(interbank)', premium_search_args)
    yape = search_lima('(yape)', premium_search_args)
    scotia = search_lima('(scotiabank)', premium_search_args)
    all_search = bcp + bbva + interbank + yape + scotia
    return all_search
示例#15
0
def fetch_and_parse_tweets():
    search_args = searchtweets.load_credentials()
    tweets = searchtweets.collect_results(
        {"query": "from:hypertextadrien"},
        max_results=100,
        result_stream_args=search_args
    )
    parsed_tweets = [parse_tweet(status.all_text) for status in tweets]
    parsed_tweets = [t for t in parsed_tweets if t is not None]
    print("%s tweets were fetched." % len(parsed_tweets))
    pickle.dump(parsed_tweets, open(CACHE_TWEETS_FILE_PATH, "wb"))
    return parsed_tweets
示例#16
0
 def __init__(self, search_query):
     print(self.__class__.__name__)
     self.premium_search_args = searchtweets.load_credentials()
     self.rule = searchtweets.gen_rule_payload(
         search_query.query,
         to_date=(datetime.now() - timedelta(days=7)).strftime('%Y-%m-%d'))
     try:
         self.iter = iter(
             searchtweets.collect_results(
                 self.rule, result_stream_args=self.premium_search_args))
     except Exception:
         self.iter = iter([])
示例#17
0
 def __init__(self):
     __dir_path = os.path.dirname(os.path.realpath(__file__))
     credentials = get_credidentials()
     self.twitter_premium_api = load_credentials(
         filename="{}/{}".format(__dir_path, "twitter_keys.yaml"),
         yaml_key="search_tweets_api_30day")
     self.twitter_api = Twitter(auth=OAuth(
         consumer_key=credentials['twitter']['consumer_key'],
         consumer_secret=credentials['twitter']['consumer_secret'],
         token=credentials['twitter']['access_token_key'],
         token_secret=credentials['twitter']['access_token_secret']))
     self.yelp_api = YelpAPI(credentials['yelp']['api_key'])
     self.__data_path = "../data/raw"
     logger.info("initiation started.")
示例#18
0
def get_file(aname,
             cak,
             cask,
             etype,
             hashtag,
             keywords,
             fdate='00-00-0000',
             tdate='00-00-0000',
             ftime='00:00',
             ttime='00:00'):

    if etype == 'efa':  # Full archive scraping (refer to limits on README)
        endp = 'https://api.twitter.com/1.1/tweets/search/fullarchive/' + aname + '.json'
    elif etype == 'tdays':  # 30 days scraping (refer to limits on README)
        endp = 'https://api.twitter.com/1.1/tweets/search/30day/' + aname + '.json'
    else:
        endp = 'ERROR'

    # Creating a yaml credentials file
    config = dict(search_tweets_api=dict(account_type='premium',
                                         endpoint=endp,
                                         consumer_key=cak,
                                         consumer_secret=cask))

    with open('C:\\Users\\Samuktha\\Documents\\USC\\twitter\\proj\\cred.yaml',
              'w') as config_file:
        yaml.dump(config, config_file, default_flow_style=False)

    # loading credentials
    premium_search_args = load_credentials(
        'C:\\Users\\Samuktha\\Documents\\USC\\twitter\\proj\\cred.yaml',
        yaml_key='search_tweets_api',
        env_overwrite=True)
    print(premium_search_args)

    if etype == 'efa':
        rule = gen_rule_payload(
            results_per_call=100,
            from_date=fdate + ' ' + ftime,  #"2019-07-06 01:00",
            to_date=tdate + ' ' + ttime,  #"2019-07-06 02:15",
            pt_rule=keywords,
        )
    else:
        rule = gen_rule_payload(results_per_call=100, pt_rule=keywords)

    # result stream

    rs = ResultStream(rule_payload=rule, max_results=50, **premium_search_args)

    return rs
示例#19
0
 def twitter_login(self, ACCESS_TOKEN, ACCESS_TOKEN_SECRET, CONSUMER_KEY, CONSUMER_SECRET):
     '''
     :param ACCESS_TOKEN:
     :param ACCESS_TOKEN_SECRET:
     :param CONSUMER_KEY:
     :param CONSUMER_SECRET:
     :return:
     '''
     # Create login for search by users and search by words
     self._auth = tweepy.OAuthHandler(CONSUMER_KEY, CONSUMER_SECRET)
     self._auth.set_access_token(ACCESS_TOKEN, ACCESS_TOKEN_SECRET)
     # Create credential variable for the historical API search
     self._premium_search_args = load_credentials(r"data/login.yaml",
                                            yaml_key="search_tweets_premium", env_overwrite=False)
示例#20
0
def main():
    parser = parse_cmd_args()
    args_dict = vars(parse_cmd_args().parse_args())
    if args_dict.get("debug") is True:
        logger.setLevel(logging.DEBUG)
        logger.debug(json.dumps(args_dict, indent=4))

    if args_dict.get("config_filename") is not None:
        configfile_dict = read_config(args_dict["config_filename"])
    else:
        configfile_dict = {}

    creds_dict = load_credentials(filename=args_dict["credential_file"],
                                  account_type=args_dict["account_type"],
                                  yaml_key=args_dict["credential_yaml_key"],
                                  env_overwrite=args_dict["env_overwrite"])

    dict_filter = lambda x: {k: v for k, v in x.items() if v is not None}

    config_dict = merge_dicts(dict_filter(configfile_dict),
                              dict_filter(args_dict), dict_filter(creds_dict))

    logger.debug(json.dumps(config_dict, indent=4))

    if len(dict_filter(config_dict).keys()
           & REQUIRED_KEYS) < len(REQUIRED_KEYS):
        print(REQUIRED_KEYS - dict_filter(config_dict).keys())
        logger.error("ERROR: not enough arguments for the program to work")
        sys.exit(1)

    stream_params = gen_params_from_config(config_dict)

    logger.debug(json.dumps(config_dict, indent=4))

    rs = ResultStream(tweetify=False, **stream_params)

    logger.debug(str(rs))

    if config_dict.get("filename_prefix") is not None:
        stream = write_result_stream(
            rs,
            filename_prefix=config_dict["filename_prefix"],
            results_per_file=config_dict["results_per_file"])
    else:
        stream = rs.stream()

    for tweet in stream:
        if config_dict["print_stream"] is True:
            print(json.dumps(tweet))
示例#21
0
def getPremiumEndpointCreds(endpointType):
    """
    fetches credentials for some premium endpoint using an api key and secret
    which are already in the system's environment variables
    :parameter endpointType which premium endpoint to get the credentials for (30 day or full archive)
    :return credentials for some premium endpoint
    """
    os.environ[ENDPOINT_ENV_VAR] = endpointType
    searchArgs = st.load_credentials(filename="NoCredsFile.yaml",
                                     account_type="premium",
                                     yaml_key="dummyYamlKey")
    # cleaning up this temporary environment variable to avoid causing a side effect
    del os.environ[ENDPOINT_ENV_VAR]

    return searchArgs
示例#22
0
def set_creds():
    config = dict(search_tweets_api=dict(
        account_type='premium',
        endpoint=
        'https://api.twitter.com/1.1/tweets/search/fullarchive/<environment-label>.json',
        consumer_key='Add your consumer key',
        consumer_secret='Add your consumer secret'))

    with open('credentials/api-credentials.yaml', 'w') as config_file:
        yaml.dump(config, config_file, default_flow_style=False)

    premium_search_args = load_credentials("api-credentials.yaml",
                                           yaml_key="search_tweets_api",
                                           env_overwrite=False)
    print(premium_search_args)
    return premium_search_args
示例#23
0
def auth(dates):
    premium_args = load_credentials(filename="credentials.yaml",
                                    yaml_key='search_tweets_api_dev',
                                    env_overwrite=False)
    # Change the below string to the candidate you're looking for info on. Don't remove the lang:en otherwise you'll
    # get results in any language
    queryString = 'Donald Trump lang:en'
    rule = gen_rule_payload(queryString,
                            results_per_call=100,
                            from_date=dates[0],
                            to_date=dates[1])
    print(rule)
    tweets = collect_results(rule,
                             max_results=100,
                             result_stream_args=premium_args)
    [print(tweet.all_text) for tweet in tweets]
    return tweets, queryString
    def __init__(self,
                 topic: str,
                 path_to_keys: str = './keys/twitter_keys.yaml'):
        # set up access to API
        self.premium_search_args = load_credentials(
            path_to_keys,
            yaml_key="search_tweets_premium",
            env_overwrite=False)
        self.topic = topic

        # open topic_tweets.csv file that we will be modifying
        self.topic = topic
        try:
            self.tweets_df = pd.read_csv("{}_tweets.csv".format(self.topic),
                                         index_col='id')

        except FileNotFoundError:
            self.tweets_df = pd.DataFrame()
示例#25
0
def get_data(search_query, api_key, secret_key, to_date, from_date, filename):
    """ get twitter data through twitter API from full archive search sand box and return all twitters in JSONL file
    based on 
     search term, 
     the geographic location of interest
     the time period of interest.
     and personal twitter account information.

     Reference: https://github.com/geduldig/TwitterAPI/tree/master/TwitterAPI
     Reference: https://developer.twitter.com/en/docs/tweets/search/overview
    """
    print_after_x = 1000
    config = dict(
        search_tweets_api=dict(
            account_type='premium',
            endpoint=f"https://api.twitter.com/1.1/tweets/search/{'fullarchive'}/{'mangroveConservation'}.json",
            consumer_key=api_key,
            consumer_secret=secret_key
        )
    )
    with open('twitter_keys.yaml', 'w') as config_file:
        yaml.dump(config, config_file, default_flow_style=False)
    from searchtweets import load_credentials, gen_rule_payload, ResultStream

    premium_search_args = load_credentials("twitter_keys.yaml",
                                           yaml_key="search_tweets_api",
                                           env_overwrite=False)
    rule = gen_rule_payload(search_query,
                            results_per_call=100,
                            from_date=from_date,
                            to_date=to_date
                            )
    temp = ResultStream(rule_payload=rule,
                      max_results=100000,
                      **premium_search_args)
    with open(filename, 'a', encoding='utf-8') as temp_file:
        num = 0
        for tweet in temp.stream():
            num += 1
            if num % print_after_x == 0:
                print('{0}: {1}'.format(str(num), tweet['created_at']))
            json.dump(tweet, temp_file)
            temp_file.write('\n')
    print('done')
def save_old_tweets():
    from searchtweets import load_credentials, gen_rule_payload, ResultStream
    import json

    premium_search_args = load_credentials("twitter_keys_fullarchive.yaml",
                                           yaml_key="search_tweets_api",
                                           env_overwrite=False)

    query = "from:NTOO_Org"
    rule = gen_rule_payload(query, results_per_call=100)

    rs = ResultStream(rule_payload=rule,
                      max_results=1000,
                      **premium_search_args)

    with open('fullTweetsData.json', 'a', encoding='utf-8') as f:
        for tweet in rs.stream():
            json.dump(tweet, f)
            f.write('\n')
def read_stream(apiscope, label):
    API_KEY = api_key
    API_SECRET_KEY = api_secret_key
    DEV_ENVIRONMENT_LABEL = label
    API_SCOPE = apiscope  # 'fullarchive'  # 'fullarchive' for full archive, '30day' for last 31 days

    SEARCH_QUERY = 'delays, @WestMidRailway OR @NetworkRailBHM OR @networkrail'
    RESULTS_PER_CALL = 100  # 100 for sandbox, 500 for paid tiers
    TO_DATE = '2021-01-30'  # format YYYY-MM-DD HH:MM (hour and minutes optional)
    FROM_DATE = '2021-01-01'  # format YYYY-MM-DD HH:MM (hour and minutes optional)

    MAX_RESULTS = 10000  # Number of Tweets you want to collect

    # --------------------------- STOP -------------------------------#
    # Don't edit anything below, if you don't know what you are doing.
    # --------------------------- STOP -------------------------------#

    config = dict(search_tweets_api=dict(
        account_type='premium',
        endpoint=
        f"https://api.twitter.com/1.1/tweets/search/{API_SCOPE}/{DEV_ENVIRONMENT_LABEL}.json",
        consumer_key=API_KEY,
        consumer_secret=API_SECRET_KEY))

    with open('twitter_keys.yaml', 'w') as config_file:
        yaml.dump(config, config_file, default_flow_style=False)

    premium_search_args = load_credentials("twitter_keys.yaml",
                                           yaml_key="search_tweets_api",
                                           env_overwrite=False)

    rule = gen_rule_payload(SEARCH_QUERY,
                            results_per_call=RESULTS_PER_CALL,
                            from_date=FROM_DATE,
                            to_date=TO_DATE)

    rs = ResultStream(rule_payload=rule,
                      max_results=MAX_RESULTS,
                      **premium_search_args)

    return rs
def read_user_timeline(name='',
                       from_date=pd.to_datetime('2020-1-1'),
                       to_date=pd.to_datetime('2020-9-1'),
                       method='tweepy'):

    if method == 'fullsearch':
        premium_search_args = load_credentials(".twitter_keys.yaml",
                                               account_type="premium",
                                               env_overwrite=False)
        rule = gen_rule_payload(
            "from:" + name,
            from_date=str(
                from_date.strftime('%Y-%m-%d')),  #UTC 2017-09-01 00:00
            to_date=str(to_date.strftime('%Y-%m-%d')),
            results_per_call=100)
        tweets = collect_results(rule,
                                 max_results=100,
                                 result_stream_args=premium_search_args
                                 )  # change this if you need to

    elif method == 'tweepy':
        creds = json.load(open("twitter_credentials.json", "r"))
        api = Twitter(
            auth=OAuth(creds['ACCESS_TOKEN'], creds['ACCESS_SECRET'],
                       creds['CONSUMER_KEY'], creds['CONSUMER_SECRET']))
        n = 10
        for i in range(n):
            if (i == 0):
                tweets = api.statuses.user_timeline(screen_name="@" + name,
                                                    count=400)
                last_id = tweets[-1]['id']
            else:
                t = api.statuses.user_timeline(screen_name="@" + name,
                                               count=400,
                                               max_id=last_id)
                last_id = t[-1]['id']
                tweets.extend(t)

        pickle.dump(tweets, open(name + 'tweets.sav', 'wb'))

    return tweets
示例#29
0
def searchTweetsAndWriteToFile(search_term, file_name, lang):
    if (not isinstance(lang, LanguageEnum)):
        raise TypeError("lang must be LanguageEnum instance")

    if (not len(search_term > 0)):
        return ""
    premium_search_args = load_credentials("~/.twitter_keys.yaml",
                                           yaml_key="search_tweets_30_day_dev",
                                           env_overwrite=False)
    # testing with a sandbox account
    rule = gen_rule_payload(search_term + " lang:" + lang.value,
                            results_per_call=100)
    print(rule)
    tweets = collect_results(rule,
                             max_results=200,
                             result_stream_args=premium_search_args)
    with open(file_name, "w") as fp:
        for tweet in tweets:
            json.dump(tweet, fp)
            fp.write("\n")
    fp.close()
示例#30
0
def loadTweets (user):
	print('Loading tweets from ' + user + '...')
	search_args = st.load_credentials("twitter_keys.yaml",
									yaml_key="search_tweets_api",
									env_overwrite=False)

	rule = st.gen_rule_payload("from:"+user, 
							results_per_call=100,
							from_date="2020-04-18",
							to_date="2020-05-18"
							)

	rs = st.ResultStream(rule_payload=rule,
						max_results=100,
						**search_args)

	results = list(rs.stream())
	with open(user+'Twts.jsonl', 'w', encoding='utf-8') as f:
		for tweet in results:
			json.dump(tweet, f)
			f.write('\n')
	print('done - ' + str(len(results)) + " tweets saved")
示例#31
0
def getRecentTweets():
    endRecent = 'https://api.twitter.com/2/tweets/search/recent'

    search_args_rec = load_credentials(".twitter_keys.yaml",
                                       yaml_key="search_tweets_v2_recent",
                                       env_overwrite=False)

    query = {
        "max_results": 100,
        "tweet.fields": "public_metrics,author_id,lang",
        "query":
        "happy -RT OR upset -RT OR lol -RT OR ugh -RT OR dog -RT OR cat -RT OR food -RT OR sucks -RT",
        "expansions": "author_id",
        "user.fields": "public_metrics"
    }

    rs = ResultStream(
        request_parameters=query,
        endpoint=endRecent,
        bearer_token=bt,
        max_tweets=100,
        max_requests=1,
    )
    result = list(rs.stream())

    obj = {}

    obj['data'] = []
    obj['includes'] = []

    for r in result:
        obj['data'] = obj['data'] + r.get('data')
        obj['includes'] = obj['includes'] + r.get('includes').get('users')

    out = open('testJson.json', 'w')
    json.dump(obj, out)