Пример #1
0
    def on_status(self, status):
        # type: (SimpleFireStreamListener, Status) -> None

        if SimpleFireStreamListener.is_relevant(status, verbose=True):
            text = get_status_text(status)

            print("Relevant: " + text.encode("UTF-8"))
Пример #2
0
    def on_status(self, status):
        # type: (MongoDBStreamListener, Status) -> None

        # Make text safe to print in console.
        safe_text = get_status_text(status).encode('UTF-8')

        # If the status is relevant,
        if MongoDBStreamListener.is_relevant(status):
            # Insert it into our database.
            self.mongodatabase[self.TWEETS_TABLE].insert_one(status._json)
            print("Hit: " + safe_text)
        else:
            print("Ign: " + safe_text)
Пример #3
0
    def is_relevant(status, verbose=False):
        # type: (Status, bool) -> bool
        """
        Tells you if a Status is relevant.
        :param status: The status.
        :param verbose: Should we print irrelevant statuses?
        :return: Whether or not the status is relevant.
        """

        if 'fire' in get_status_text(status):
            return True

        return False
Пример #4
0
    def is_relevant(status, verbose=False):
        # type: (Status, bool) -> bool
        """
        Tells you if a Status is relevant.
        :param status: The status.
        :param verbose: Should we print irrelevant statuses?
        :return: Whether or not the status is relevant.
        """

        text = get_status_text(status)

        if 'fire' in text:
            print("Relevant:")
            return True

        if verbose:
            # Show snapshot of irrelevant tweet
            print("Not relevant: {}".format(text))

        return False
Пример #5
0
"""
Demonstrates the ability to search for fire regardless of location.
"""
# noinspection PyUnresolvedReferences
import __init__

import tweepy

from pprint import pprint

from twitter_fire_scraper.twitter import TwitterAuthentication
from twitter_fire_scraper.util import get_status_text

if __name__ == "__main__":
    # Set up twitter auth.
    twauth = TwitterAuthentication.autodetect_twitter_auth()
    api = tweepy.API(twauth.oauth_handler)

    print(
        "Just searching for 'fire'... Probably not going to get us Chicago fire incidents."
    )

    for status in api.search("fire"):
        print(get_status_text(status))
        # Retrieve however many tweets we want, and store that in our dictionary.
        all_tweets[search_term] = [
            status for status in cursor.items(MAX_TWEETS)
        ]

        print("{n} hits for {ht}:".format(n=len(all_tweets[search_term]),
                                          ht=search_term))

        # For all statuses that we retrieved,
        for status in all_tweets[search_term][0:5]:  # type: Status
            print(" " * 4),  # Prefix with four spaces to show hierarchy.
            print("<{}> :".format(status_to_url(status)))  # URL of tweet.

            print(" " * 4),  # Status belonging to URL.
            print(get_status_text(status).encode("UTF-8"))  # Status text.
            print

    print("Final results of scraping {n} tweets each from these search terms:".
          format(n=MAX_TWEETS))

    print(", ".join(search_terms))

    # Determine unique statuses found from all keywords

    # Unique statuses. Sets can contain no duplicate elements.
    unique_status_ids = set()  # type: set[long]
    total_statuses = 0  # Total amount of statuses found.
    for keyword, statuses in all_tweets.items():

        for status in statuses:
Пример #7
0
    def save_statusdict_to_csv(self, statusdict, filepath, overwrite=False):
        # type: (Scraper, Dict[str, List[Status]], str, bool) -> str
        """Save a status dict to a CSV file.
        :param statusdict A {str: [Status, Status, ...]} dictionary.
        :param filepath A path to the file to output to."""

        dirname = os.path.dirname(filepath)

        if not os.path.exists(dirname):
            raise NotADirectoryError("Directory {} does not exist!".format(dirname))

        if os.path.isfile(filepath):
            if not overwrite:
                raise FileExistsError("File at '{}' already exists!".format(filepath))

        fieldnames = [
            'category', 'tweet_id', 'text', 'date', 'retweet_count',

            'geo',

            'coordinates',

            'place_id', 'place_centroid', 'place_country', 'place_country_code', 'place_full_name',
            'place_name', 'place_type'
        ]

        with open(filepath, 'w', encoding='utf-16', newline='') as file:
            fileWriter = csv.DictWriter(file, delimiter=Scraper.CSV_DELIMITER, quotechar='"', quoting=csv.QUOTE_ALL,
                                        fieldnames=fieldnames)

            fileWriter.writeheader()

            for keyword, statuses in statusdict.items():

                for status in statuses:  # type: Status

                    data = {
                        "category": keyword,
                        "tweet_id": status.id,
                        "text": get_status_text(status),
                        "date": status.created_at,
                        'retweet_count': status.retweet_count,
                    }

                    if status.geo:
                        data.update({
                            "geo": ','.join(str(x) for x in status.geo['coordinates']),
                        })

                    if status.coordinates:
                        data.update({
                            'coordinates': ','.join(str(x) for x in status.coordinates['coordinates']),
                        })

                    # If the status has a place, then add its data!
                    if status.place:
                        data.update({
                            'place_id': status.place.id,
                            # We have to use the API for this one to look it up.
                            'place_centroid': ','.join(str(x) for x in self.api.geo_id(status.place.id).centroid),
                            'place_country': status.place.country,
                            'place_country_code': status.place.country_code,
                            'place_full_name': status.place.full_name,
                            'place_name': status.place.name,
                            'place_type': status.place.place_type,
                        })

                    # Write all the data we've collected so far.
                    fileWriter.writerow(data)

        return filepath