Пример #1
0
 def tweets_large_geo():
     # type: () -> Dict[str, List[Status]]
     """Return a static list of 300 tweets geotagged 20 miles from Chicago's center that is generated once and
     re-used throughout the module's lifetime."""
     return CachedTweets.scraper.scrape_terms(
         {"flood", "fire", "house fire"},
         count=100,
         geocode=geobox_to_geocode(GEOBOX_CHICAGO, "20mi"))
    # Maximum tweets to search for.
    MAX_TWEETS = args.maxtweets if args.maxtweets else \
        10

    # Radius to search for tweets in.
    TWEET_RADIUS = "50mi"

    # Set up twitter auth.
    twauth = TwitterAuthentication.autodetect_twitter_auth()

    api = tweepy.API(twauth.oauth_handler,
                     wait_on_rate_limit=True,
                     wait_on_rate_limit_notify=True)

    # Get Twitter geocode from Chicago's geobox.
    geocode = geobox_to_geocode(GEOBOX_CHICAGO, TWEET_RADIUS)

    # List of search terms to search for.
    search_terms = get_fire_search_terms()

    print(
        "Scraping the top {n} tweets that reference search terms from our '{path}' datafile inside {geocode}"
        .format(n=MAX_TWEETS,
                path=os.path.basename(DataConfig.FIRE_HASHTAGS_DATA_PATH),
                geocode=geocode))

    # All tweets, sorted by search term
    all_tweets = {}

    # For all search terms,
    for search_term in search_terms:  # type: str
Пример #3
0
from pprint import pprint

from twitter_fire_scraper.scraper import Scraper
from twitter_fire_scraper.twitter import GEOBOX_CHICAGO
from twitter_fire_scraper.util import geobox_to_geocode, flatten_status_dict

if __name__ == '__main__':
    scraper = Scraper()

    terms = {"#pizza", "pizza", 'cat', '#cat'}

    results = scraper.scrape_terms(terms=terms, count=3)
    results = flatten_status_dict(results)

    print("{} on all of twitter:".format(", ".join(terms)))
    pprint(results)

    results = scraper.scrape_terms(geocode=geobox_to_geocode(
        GEOBOX_CHICAGO, "20mi"),
                                   terms=terms,
                                   count=3)
    results = flatten_status_dict(results)
    print("{} in chicago:".format(", ".join(terms)))
    pprint(results)

    # scraper = Scraper()
    #
    # results = scraper.scrape_accounts(accounts=['MABASIllinois','NWSChicago'])
    #
    # pprint(results)