Пример #1
0
def main():
	username = '******'
	quotes = 'quotes.txt'
	#Opens quotes file for quotes of anguish (line fo each quote)
	with open(quotes) as f:
		quotes = [line.strip() for line in f if line != "\n"]

	pprint(quotes)

	auth = OAuth(ACCESS_TOKEN, ACCESS_TOKEN_SECRET, API_KEY, API_SECRET)
	t = Twitter(auth=auth)
	ts = TwitterStream(domain='userstream.twitter.com', auth=auth)

	stream = ts.user()

	for tweet in stream:

		#pprint(tweet)

		if 'event' in tweet:
			print('received event %s' % tweet['event'])

		elif 'hangup' in tweet:
			return

		elif 'text' in tweet and tweet['user']['screen_name'] != username:
			print('from @%s: %s' % (tweet['user']['screen_name'], tweet['text']))
			line = random.choice(quotes)
			print('responding with line: %s' % line)
			reply = '@' + tweet['user']['screen_name'] + ' ' + line
			t.statuses.update(status=reply, in_reply_to_status_id=tweet['id'])
Пример #2
0
def get_twitter_stream():
    stream = TwitterStream(
        domain="userstream.twitter.com",
        api_version="1.1",
        auth=OAuth(**TWITTER),
    )
    return stream.user()
Пример #3
0
def _follow_userstream(bot):
    o = service.config_for(bot).oauth._fields
    stream = TwitterStream(auth=twitter.OAuth(**o), domain="userstream.twitter.com", block=False)

    for msg in stream.user():
        if msg is not None:
            service.logger.debug(str(msg))

            # Twitter signals start of stream with the "friends" message.
            if 'friends' in msg:
                _announce(bot, "\x02twitter:\x02 This channel is now streaming Twitter in real-time.")
            elif 'text' in msg and 'user' in msg:
                service.storage_for(bot).last = msg

                url_format = "(https://twitter.com/{0[user][screen_name]}/status/{0[id_str]})"
                if 'retweeted_status' in msg:
                    text = "\x02[@{0[user][screen_name]} RT @{0[retweeted_status][user][screen_name]}]\x02 {0[retweeted_status][text]} " + url_format
                else:
                    text = "\x02[@{0[user][screen_name]}]\x02 {0[text]} " + url_format

                _announce(bot, text.format(msg))
        else:
            time.sleep(.5)

        if not service.storage_for(bot).active:
            return
Пример #4
0
    def stream(self):
        """Listens to your feed, and updates it whenever
           someone posts a new tweet."""
        twitter_stream = TwitterStream(auth=authenicate(), 
                                      domain='userstream.twitter.com')       

        for data in twitter_stream.user():
            self.feed.values = self.update_feed(data)
Пример #5
0
    def open_stream(self):
        """
        Opens an interface to the Twitter API and opens a stream.
        """
        t = Twitter(auth=self.auth)
        ts = TwitterStream(domain='userstream.twitter.com', auth=self.auth)

        self.twitter = t
        self.stream = ts.user()
        self.iterator = iter(self.stream)
 def GetTimeLineSteam(self, compositionRunner=None, block=True):
     while True:
         try:
             twitter_stream = TwitterStream(
                 domain="userstream.twitter.com",
                 api_version="1.1",
                 auth=OAuth(oauth_token, oauth_secret, CONSUMER_KEY, CONSUMER_SECRET),
                 block=block)
             iterator = twitter_stream.user()
             for tweet in iterator:
                 self.ProcessTweet(tweet, compositionRunner)
         except Exception:
             time.sleep(60)
Пример #7
0
def _follow_userstream(ctx):
    o = ctx.config.oauth._fields
    stream = TwitterStream(auth=twitter.OAuth(**o), domain="userstream.twitter.com", block=False)

    reconnect_seconds = [2, 10, 60, 300]
    reconnect_tries = 0

    while ctx.storage.active:
        try:
            for msg in stream.user():
                if msg is not None:
                    service.logger.debug(str(msg))

                    # Twitter signals start of stream with the "friends" message.
                    if 'friends' in msg:
                        _announce(ctx, "\x02twitter:\x02 This channel is now streaming Twitter in real-time.")
                        reconnect_tries = 0
                    elif 'text' in msg and 'user' in msg:
                        memorize_id(ctx, msg["id_str"])
                        ctx.storage.last = msg

                        url_format = "(https://twitter.com/{0[user][screen_name]}/status/{0[id_str]})"
                        if 'retweeted_status' in msg:
                            text = "\x02[@{0[user][screen_name]} RT @{0[retweeted_status][user][screen_name]}]\x02 {0[retweeted_status][text]} " + url_format
                        else:
                            text = "\x02[@{0[user][screen_name]}]\x02 {0[text]} " + url_format

                        _announce(ctx, text.format(msg))
                else:
                    time.sleep(.5)

                if not ctx.storage.active:
                    return

            _announce(ctx, "\x02twitter:\x02 Twitter userstream connection lost! Waiting {time} seconds to reconnect.".format(
                            time=reconnect_seconds[reconnect_tries]
                        ))
        except Exception as e:
            _announce(ctx, "\x02twitter:\x02 Exception thrown while following userstream! Waiting {time} seconds to reconnect.".format(
                            time=reconnect_seconds[reconnect_tries]
                        ))
            _announce(ctx, "↳ {name}: {info}".format(
                            name=e.__class__.__name__,
                            info=str(e)
                        ))

        time.sleep(reconnect_seconds[reconnect_tries])
        reconnect_tries += 1
Пример #8
0
def init():
    oauth_client = Oauth(config.get('oauth', 'consumer_key'),
                         config.get('oauth', 'consumer_secret'),
                         config.get('oauth', 'request_token_url'),
                         config.get('oauth', 'access_token_url'),
                         config.get('oauth', 'authorize_url'),
                         version=config.get('oauth', 'version'))

    request = Request(url=config.get('twitter', 'streaming_filter_url'),
                      method="POST",
                      is_streaming=True,
                      headers={'Accept-Encoding': 'deflate, gzip '},
                      payload={'locations': '-118.39,30.41,-59.61,49.46'},
                      token=token)

    max_stream = int(config.get('twitter', 'max_stream_responses'))
    topic = config.get('kafka', 'topic')
    max_skip_invalid_responses = config.getint('twitter', 'max_skip_invalid_response')
    skip_invalid_responses = config.getboolean('twitter', 'skip_invalid')
    producer = KeyedProducer(kafka_client, async=True)

    twitter = TwitterStream(oauth_client, json)
    tweets = twitter.get_tweets(request)

    # Starts here.
    try:
        if max_stream < 0:
            send_unlimited_messages(tweets, producer, topic)
        else:
            send_limited_messages(max_stream,
                                  tweets,
                                  producer,
                                  topic,
                                  skip_invalid_responses,
                                  max_skip_invalid_responses)
    except Exception as e:
        print e
    finally:
        producer.stop()
        kafka_client.close()
Пример #9
0
def main():
    # open up a file and get a list of lines of lyrics (no blank lines)
    with open(LYRICS) as lyrics_file:
        lyrics = [line.strip() for line in lyrics_file if line != "\n"]

    # print out our list of lyrics (for diagnostics)
    pprint(lyrics)

    # get twitter api ready
    auth = OAuth(ACCESS_TOKEN, ACCESS_TOKEN_SECRET, API_KEY, API_SECRET)
    t = Twitter(auth=auth)
    ts = TwitterStream(domain='userstream.twitter.com', auth=auth)

    # open up our user's stream
    stream = ts.user()

    # iterate through every event
    for tweet in stream:

        # Print it out nicely, so we can see what happens.
        pprint(tweet)

        if 'event' in tweet:
            print('received event %s' % tweet['event'])

        elif 'text' in tweet and tweet['user']['screen_name'] != USERNAME:

            # 'text' means that this is a tweet.  If the screen name wasn't our
            # own, this is someone tweeting at us.

            # print out the important bits
            print('from @%s: %s' % (tweet['user']['screen_name'], tweet['text']))

            # Pick a lyric, compose a reply, and send it!
            line = random.choice(lyrics)
            print('responding with line: %s' % line)
            reply = '@' + tweet['user']['screen_name'] + ' ' + line
            t.statuses.update(status=reply, in_reply_to_status_id=tweet['id'])
Пример #10
0
 def _get_iterator(self):
     """Returns twitter stream iterator object."""
     try:
         return TwitterStream(auth=self.oauth).statuses.sample()
     except Exception as e:
         raise e
Пример #11
0
 def _get_stream(self):
     """Returns twitter stream object."""
     try:
         return TwitterStream(auth=self.oauth)
     except Exception as e:
         raise e
Пример #12
0
 def _setup_twitter_stream(self):
     self._twitter_stream = TwitterStream(auth=self._get_oauth())
Пример #13
0
        os.environ['CONSUMER_KEY'],
        os.environ['CONSUMER_SECRET']
    )
    SHORTE_ST_TOKEN = os.environ['SHORTE_ST_TOKEN']
except KeyError:  # For local tests.
    with open('credentials', 'r') as secret:
        exec(secret.read())
        oauth = OAuth(
            ACCESS_TOKEN, ACCESS_SECRET, CONSUMER_KEY, CONSUMER_SECRET
        )

t = Twitter(auth=oauth)
# For uploading photos.
t_upload = Twitter(auth=oauth, domain="upload.twitter.com")

ts = TwitterStream(auth=oauth)
tu = TwitterStream(auth=oauth, domain="userstream.twitter.com")

# Following are some useful wrappers for Twitter-related functionalities.


def pf(sn):
    """
    Attempts to print the followers of a user, provided their
    screen name.
    """

    cursor = -1
    next_cursor = 1
    while cursor != 0:
        followers = t.followers.list(screen_name=sn, cursor=cursor)
# Twitter user
user = "******"

if __name__ == '__main__':

    try:

        oauth = OAuth(access_token, access_token_secret, consumer_key,
                      consumer_secret)

        # Connect to Twitter Streaming API
        #twitter_stream = TwitterStream(auth = oauth)

        # UNCOMMENT when ready to test

        twitter_stream = TwitterStream(auth=oauth, secure=True)
        # Get an iterator on the public data following through Twitter
        #tweet_iterator = twitter_stream.statuses.filter(locations='-180,-90,180,90')
        #print(json.loads(twitter_stream))
        # UNCOMMENT when ready to test
        tweets = twitter_stream.statuses.filter(track=user)

        for tweet in tweets:
            #print json.dumps(tweet, indent=2, sort_keys=True)
            #entities = tweet.get("entities")
            entities = tweet.get("extended_entities")
            print json.dumps(entities, indent=2, sort_keys=True)
            if (entities):
                print json.dumps(entities, indent=2, sort_keys=True)
                media_list = entities.get("media")
                if (media_list):
def get_iterable():
    oauth = OAuth(ACCESS_TOKEN, ACCESS_SECRET, CONSUMER_KEY, CONSUMER_SECRET)
    twitter_stream = TwitterStream(auth=oauth)
    return twitter_stream.statuses.sample()
def getData():
    #getData() - grab data from Twitter API and parsing the information in order to insert into database
    conn = psycopg2.connect(
        "dbname='kkosyka_db' host='localhost' user='******' password='******'"
    )  #(database information - database, host, user, password)
    cur = conn.cursor()

    # Import the necessary methods from "twitter" library
    from twitter import Twitter, OAuth, TwitterHTTPError, TwitterStream

    # Variables that contains the user credentials to access Twitter API
    # Access values under Kalynn Kosyka, one may need to change for their projects
    ACCESS_TOKEN = '28930526-ttro9V7TUvuUfXMe4e3OBMlU38MuKn9ISLUwqMvP9'
    ACCESS_SECRET = 'dI0t4RRSJU53FciGw1jYfApDkx1x3znrWwH9zSdfetQjh'
    CONSUMER_KEY = '3KUdtFeceeLB3rs3pJDe4fbeM'
    CONSUMER_SECRET = 'vPps0BgF2Vm0UZXKdi67URWUnIl5ygk1m5KLRHbXVWwGHCoej1'
    oauth = OAuth(ACCESS_TOKEN, ACCESS_SECRET, CONSUMER_KEY, CONSUMER_SECRET)

    # Initiate the connection to Twitter Streaming API
    twitter_stream = TwitterStream(auth=oauth)
    twitter = Twitter(auth=oauth)

    #Creates new file that will contain the most n-number of recent tweets that contain a particular hashtag
    newFile = writeFile
    fileName = open(newFile, "a")
    # Getting data from Twitter and going through the data
    json_input = twitter.search.tweets(
        q='#sunset', result_type='recent', lang='en',
        count=100)  #change q value for hashtag query
    json_input = json.dumps(json_input)
    s = sched.scheduler(time.time, time.sleep)
    try:
        decoded = json.loads(json_input)
        print len(decoded["statuses"])
        numCoor = 0
        for x in range(0, len(decoded["statuses"])):
            coor = False
            text = ((decoded["statuses"][x]["text"]).encode('ascii', 'ignore'))
            text = text.replace(",", "/")
            text = str(text.replace("\n", "<br/>"))
            coorX = "null"
            coorY = "null"
            coordinates = str(decoded["statuses"][x]["coordinates"]).encode(
                'ascii', 'ignore')
            if not len(coordinates) <= 4:  #if there are coordinates
                coor = True
                numCoor += 1
                coordinates = str(decoded["statuses"][x]["coordinates"]
                                  ["coordinates"]).encode('ascii', 'ignore')
                coordinates = coordinates.split(",")
                coorX = (coordinates[0][1:len(coordinates[0])])
                coorY = (coordinates[1][0:len(coordinates[1]) - 1])
            if not coor:  # filtering so we only get tweets with coordinates, otherwise we skip them
                continue
            else:
                screenName = (
                    decoded["statuses"][x]["user"]["screen_name"]).encode(
                        'ascii', 'ignore')
                createdAt = (decoded["statuses"][x]["created_at"]).encode(
                    'ascii', 'ignore')
                hashtags = []
                for i in range(
                        0,
                        len(decoded["statuses"][x]["entities"]["hashtags"])):
                    hashtags.append(decoded["statuses"][x]["entities"]
                                    ["hashtags"][i]["text"])
            print "num coor:\n" + str(numCoor)
            print "---------------------"

            #Write data in CSV format - text, coorX, coorY, username, created at, hashtag(s)
            #if one wants to save the data into a csv file, uncomment line below
            #fileName.write(text + "," + coorX +","+ coorY + ","+ screenName +"," + createdAt + ",")

            hashtagsHolder = ""
            for j in range(0, len(hashtags)):
                fileName.write("#" + hashtags[j] + " ")
                hashtagsHolder = hashtagsHolder + "#" + hashtags[j] + " "

            printPretty(text, coorX, coorY, screenName, createdAt,
                        hashtagsHolder
                        )  #print data into console, comment out if not needed

            #Insert data into database - assuming database already exists
            #database - text(text), xcoor(numeric), ycoor(numeric), username(text), created(text), hashtags(text), twitterGeom (geometry - SRID 4326)
            cur.execute(
                """SELECT EXISTS(SELECT 1 FROM public."TwitterDataSample" WHERE text=%s AND xcoor=%s AND ycoor=%s AND username=%s AND created=%s AND hashtags=%s ) """,
                (text, coorX, coorY, screenName, createdAt, hashtagsHolder))

            if cur.fetchone()[0] == False:
                cur.execute(
                    """INSERT INTO public."TwitterDataSample"(text, xcoor, ycoor, username, created, hashtags) VALUES
                        (%s,%s,%s, %s, %s, %s)""",
                    (text, coorX, coorY, screenName, createdAt,
                     hashtagsHolder))
            #Using and converting coordinate values into geometry value with SRID 4326
            cur.execute(
                """UPDATE public."TwitterDataSample" SET "twitterGeom" = ST_GeomFromText('POINT('||xcoor::text||' '||ycoor::text||')', 4326)"""
            )
            conn.commit()

    except (ValueError, KeyError, TypeError):
        fileName.write("JSON format error")
    fileName.close()
    #print stars, used for pretty printing and dividing info in console - can be commented out
    print "*********************************************************"
    s.enter(3600, 1, getData, ())  #run every x sec, 3600s = 1hr
    s.run()
Пример #17
0
import json
#import simplejson as json
import twitter
from twitter import Twitter, OAuth, TwitterHTTPError, TwitterStream

ACCESS_TOKEN = '1355xxxxx'
ACCESS_SECRET = 'xxxxxxxx'
CONSUMER_KEY = '1xxxxxxxNR'
CONSUMER_SECRET = 'xxxxLLU'

oauth = OAuth(ACCESS_TOKEN, ACCESS_SECRET, CONSUMER_KEY, CONSUMER_SECRET)

twitter_data = TwitterStream(auth=oauth)
twitter_rest_data = Twitter(auth=oauth)
getT = twitter_rest_data.search.tweets(q='#Hillary')

iterator = twitter_data.statuses.sample()
file = open('out.txt', 'w')
tweet_count = 10
for tweet in iterator:
    tweet_count -= 1
    pr = json.dumps(tweet, indent=4)
    file.write(pr)
    if tweet_count <= 0:
        break
file.close()
Пример #18
0
if __name__ == "__main__":
    # Get credentials
    credentials = get_credentials()
    output_file = open("output_tweets.json", "a")
    # Get authentication
    auth = OAuth(credentials["ACCESS_TOKEN"], credentials["ACCESS_SECRET"],
                 credentials["CONSUMER_KEY"], credentials["CONSUMER_SECRET"])
    print("Start getting tweets")
    # Set up twitter stream
    keywords = [
        'Flu', 'Zika', 'Ebola', 'Diarrhea', 'Headache', 'Measles', 'flu',
        'zika', 'ebola', 'diarrhea', 'headache', 'measles'
    ]
    while True:
        try:
            stream = TwitterStream(auth=auth, secure=True)
            tweets = stream.statuses.filter(track=keywords)
            for tweet in tweets:
                output_file.write(json.dumps(tweet) + "\n")
                # Display some tweet information
                print(json.dumps(tweet))
                print("ID: " + str(tweet["id"]))
                print("User: "******"user"]["screen_name"]))
                print("Text: " + str(tweet["text"]))
        except TwitterError as e:
            # If limit is reached wait 5 minutes
            print(e)
            time.sleep(300)
        except KeyboardInterrupt:
            print("Program killed")
            output_file.close()
Пример #19
0
loc_pad = 0.01

auth = OAuth(
    creds["access_token"],
    creds["access_token_secret"],
    creds["consumer_key"],
    creds["consumer_secret"]
)

twitter = Twitter(auth=auth)

t_up = Twitter(domain='upload.twitter.com',
    auth=auth)

twitter_stream = TwitterStream(auth=auth, domain="userstream.twitter.com" )

for tweet in twitter_stream.user():
    # print msg

# with open("test.json", "rb") as testf:
#     tweet = json.load(testf)

    # print json.dumps(tweet,indent=2)

    if "place" in tweet and tweet["place"] is not None:
        print "Tweet From", tweet["user"]["screen_name"], tweet["place"]
        bb = tweet["place"]["bounding_box"]
        if bb["type"] == "Polygon":
            min_lat = 90
            max_lat = -90
Пример #20
0
def getData():
    global roundCount

    # Import the necessary methods from "twitter" library
    from twitter import Twitter, OAuth, TwitterHTTPError, TwitterStream

    # Variables that contains the user credentials to access Twitter API
    ACCESS_TOKEN = '28930526-ttro9V7TUvuUfXMe4e3OBMlU38MuKn9ISLUwqMvP9'
    ACCESS_SECRET = 'dI0t4RRSJU53FciGw1jYfApDkx1x3znrWwH9zSdfetQjh'
    CONSUMER_KEY = '3KUdtFeceeLB3rs3pJDe4fbeM'
    CONSUMER_SECRET = 'vPps0BgF2Vm0UZXKdi67URWUnIl5ygk1m5KLRHbXVWwGHCoej1'

    oauth = OAuth(ACCESS_TOKEN, ACCESS_SECRET, CONSUMER_KEY, CONSUMER_SECRET)

    # Initiate the connection to Twitter Streaming API
    twitter_stream = TwitterStream(auth=oauth)
    twitter = Twitter(auth=oauth)

    #Creates new file that will contain the most 30 recent tweets that contain #smithcollege
    newFile = writeFile
    fileName = open(newFile, "a")  #"a"
    #fileName.write("text, coorX, coorY, username, created at, hashtag(s) \n")
    """
    if (roundCount == 0):
        print "yes"
        numCount = 100
    else:
        numCount=5
        print "no"

    print numCount
    """
    json_input = twitter.search.tweets(q='#sunset',
                                       result_type='recent',
                                       lang='en',
                                       count=100)
    json_input = json.dumps(json_input)
    s = sched.scheduler(time.time, time.sleep)
    try:
        decoded = json.loads(json_input)

        print len(decoded["statuses"])
        numCoor = 0
        for x in range(0, len(decoded["statuses"])):
            coor = False
            text = ((decoded["statuses"][x]["text"]).encode('ascii', 'ignore'))
            text = text.replace(",", "/")
            text = text.replace("\n", "<br/>")
            coorX = "null"
            coorY = "null"
            coordinates = str(decoded["statuses"][x]["coordinates"]).encode(
                'ascii', 'ignore')
            if not len(coordinates) <= 4:  #if there are coordinates
                coor = True
                numCoor += 1
                coordinates = str(decoded["statuses"][x]["coordinates"]
                                  ["coordinates"]).encode('ascii', 'ignore')
                coordinates = coordinates.split(",")
                coorX = coordinates[0][1:len(coordinates[0])]
                coorY = coordinates[1][0:len(coordinates[1]) - 1]
            if not coor:  #filtering so we only get tweets with coordinates, otherwise we skip them
                continue
            else:
                screenName = (
                    decoded["statuses"][x]["user"]["screen_name"]).encode(
                        'ascii', 'ignore')
                createdAt = (decoded["statuses"][x]["created_at"]).encode(
                    'ascii', 'ignore')
                hashtags = []
                for i in range(
                        0,
                        len(decoded["statuses"][x]["entities"]["hashtags"])):
                    hashtags.append(decoded["statuses"][x]["entities"]
                                    ["hashtags"][i]["text"])
            print "num coor:\n" + str(numCoor)
            print "---------------------"

            #write file in CSV format
            fileName.write(text + "," + coorX + "," + coorY + "," +
                           screenName + "," + createdAt + ",")
            for j in range(0, len(hashtags)):
                fileName.write("#" + hashtags[j] + " ")
            fileName.write("\n")

    except (ValueError, KeyError, TypeError):
        fileName.write("JSON format error")

    fileName.close()
    print "*********************************************************"
    roundCount += 1
    s.enter(1800, 1, getData, ())  #1800
    s.run()
Пример #21
0
try:
    import json
except ImportError:
    import simplejson as json

from twitter import Twitter, OAuth, TwitterHTTPError, TwitterStream

access_token = "841163774983884800-7MKXWJE5AZrfEGZ7GxupUl7iL35a7VE"
access_secret = "VlinyeyHnEBHTUwZ9RCCLCaUBO9C7F99NddEgRfbVxovf"
consumer_key = "QiUIFH5wLD1xoTy2KnAVS8J6y"
consumer_secret = "cw7gtMfHxFrxuHr1zzPlxiSpTt80TXHXxCWbrcPKUg9RKRSgY8"

oauth = OAuth(access_token, access_secret, consumer_key, consumer_secret)

twitter_stream = TwitterStream(auth=oauth, domain="userstream.twitter.com")

iterator = twitter_stream.statuses.filter(track="walmart")

tweet_count = 10000
for tweet in iterator:
    tweet_count -= 1
    print(json.dumps(tweet))

    if tweet_count <= 0:
        break
Пример #22
0
def create_tweet_iterator(token, token_secret, consumer_key, consumer_secret, bounding_box):
    oauth = OAuth(token, token_secret, consumer_key, consumer_secret)
    twitter_stream = TwitterStream(auth=oauth)
    tweet_iterator = twitter_stream.statuses.filter(locations=bounding_box)
    return tweet_iterator
Пример #23
0
    def handle(self, *args, **options):
        if not BaseKeyword.objects.count():
            raise CommandError('No keywords found!')
        keywords = ','.join([k['term'] for k in BaseKeyword.objects.values('term')])
        twitter_stream = TwitterStream(auth=OAuth(
            token=settings.TWITTER_TOKEN,
            token_secret=settings.TWITTER_TOKEN_SECRET,
            consumer_key=settings.TWITTER_CONSUMER_KEY,
            consumer_secret=settings.TWITTER_CONSUMER_SECRET)
        )
        stream = twitter_stream.statuses.filter(track=keywords)

        for tweet in stream:
            if 'retweeted_status' in tweet:
                # If this is a retweet of an earlier tweet, then we want to check only the original.
                tweet = tweet['retweeted_status']
            user = tweet['user']

            author = Account(
                twitter_id=user['id_str'],
                screen_name=user['screen_name'],
                name=user['name'],
                url=user['url'] if 'url' in user else None,
                status_count=user['statuses_count'] if 'statuses_count' in user else 0,
                follower_count=user['followers_count'] if 'followers_count' in user else 0,
                following_count=user['friends_count'] if 'friends_count' in user else 0,
                listed_in_count=user['listed_count'] if 'listed_count' in user else 0,
                is_verified=user['verified'] if 'verified' in user else False
            )

            if (tweet['retweet_count'] and tweet['favorite_count'] and
                    (author.get_weight() > 1000 or tweet['entities']['urls'])):
                # Some debug prints, visual confirmation :)
                print '-=' * 45
                print tweet['text'].encode('ascii', 'ignore')
                print tweet['created_at'], tweet['favorite_count'], tweet['retweet_count'], author.get_weight()

                try:
                    author.save()
                except IntegrityError:
                    author = Account.objects.get(twitter_id=user['id_str'])

                mentions = list()

                if tweet['entities']['user_mentions']:
                    for user in tweet['entities']['user_mentions']:
                        try:
                            (mention, created) = Account.objects.get_or_create(
                                twitter_id=user['id_str'],
                                screen_name=user['screen_name'],
                                name=user['name'],
                                url=user['url'] if 'url' in user else None,
                                status_count=user['statuses_count'] if 'statuses_count' in user else 0,
                                follower_count=user['followers_count'] if 'followers_count' in user else 0,
                                following_count=user['friends_count'] if 'friends_count' in user else 0,
                                listed_in_count=user['listed_count'] if 'listed_count' in user else 0,
                                is_verified=user['verified'] if 'verified' in user else False
                            )
                        except IntegrityError:
                            mention = Account.objects.get(twitter_id=user['id_str'])
                        mentions.append(mention)

                # try:
                tw, created = Tweet.objects.get_or_create(
                    tweet_id=tweet['id_str'],
                    defaults=dict(
                        author=author,
                        text=tweet['text'],
                        created_at=parser.parse(tweet['created_at']),
                        favorite_count=tweet['favorite_count'],
                        retweet_count=tweet['retweet_count']
                    )
                )
                # except IntegrityError:
                #     tw = Tweet.objects.get(tweet_id=tweet['id_str'])

                for user in mentions:
                    tw.mentions.add(user)
            else:
                continue
Пример #24
0
                 auth=OAuth2(conf['twitter']['key'], conf['twitter']
                             ['secret'])).oauth2.
         token(grant_type="client_credentials"))['access_token'])
     SearchConn = Twitter(domain="api.twitter.com",
                          api_version="1.1",
                          format="json",
                          auth=oauth2,
                          secure=True)
     ResConn = Twitter(domain="api.twitter.com",
                       api_version="1.1",
                       format="json",
                       auth=oauth,
                       secure=True)
     StreamConn = TwitterStream(domain="stream.twitter.com",
                                api_version="1.1",
                                auth=oauth,
                                secure=True,
                                block=False,
                                timeout=10)
 except Exception as e:
     log(
         'ERROR', 'Could not initiate connections to Twitter API: %s %s' %
         (type(e), e))
     sys.exit(1)
 try:
     locale = timezone(conf['timezone'])
 except:
     log('ERROR', "\t".join(all_timezones) + "\n\n")
     log(
         'ERROR',
         'Unknown timezone set in config.json: %s. Please choose one among the above ones.'
         % conf['timezone'])
Пример #25
0
def preprocessing():
    global lista

    query = entry_1.get()
    if len(entry_1.get()) == 0:
        tkMessageBox.showinfo("Ooops", "Please enter a query!")
        return

    count = entry_2.get()
    if len(entry_2.get()) == 0:
        tkMessageBox.showinfo("Ooops", "Please enter a number!")
        return

    try:
        tweet_count = int(count)
    except:
        tkMessageBox.showinfo("Ooops", "Please enter a number!")
        return

    double = tweet_count
    print('Saving tweets')

    szoveg = ""
    labellist = [Variable() for i in range(double)]

    # Initiate the connection to Twitter Streaming API
    twitter_stream = TwitterStream(auth=oauth)
    iterator = twitter_stream.statuses.filter(track=query, languages='en')
    for tweet in iterator:

        print("-----------------------------------")
        data = json.loads(json.dumps(tweet))
        text = data["text"].encode('utf-8')
        re00 = re.sub(r'\\', '', text)
        re001 = re.sub(r'http\S+', '', re00)
        re002 = re.sub(r'www.\S+', '', re001)
        p.set_options(p.OPT.URL, p.OPT.EMOJI)
        cleantweet = p.clean(re002)

        re1 = re.sub(r'@\S+', '', cleantweet)
        re2 = re.sub(r'RT', '', re1)  #retweeted
        re3 = re.sub(r'[^a-zA-Z,.?!;: ]', '', re2)
        re4 = re.sub(r'[,.?!:;]', ' ', re3)
        re5 = re.sub(r'(.)\1+', r'\1\1', re4).lower()

        if re4 or not re5.isspace():
            lista.append(re5)
            tweet_count -= 1

        szoveg = szoveg + str(re5) + '\n'
        if tweet_count <= 0:
            break

    #----------------tweets
    #print(szoveg)

    frame.grid(columnspan=4)
    label1 = Label(frame, text="tweets")
    label1.grid(row=0, column=0)

    j = 0
    for i in labellist:
        l = Label(frame, text=lista[j])
        l.grid(row=j + 1, column=0)
        j = j + 1

    #------------checkbuttons
    label2 = Label(frame, text="Check if positive ")
    label2.grid(row=0, column=1)

    checklist = [IntVar() for i in range(double)]
    for i in checklist:
        i.set(0)

    #print(double)
    j = 1
    for i in checklist:
        c = Checkbutton(frame, variable=i, command=checked)
        c.grid(row=j, column=1)
        j = j + 1

    buttonProcess.config(state=DISABLED)
    #-----------algorithms
    buttonBayes = Button(frame, text='Naive Bayes', command=NaiveBayes)
    buttonBayes.grid(row=0, column=2)

    buttonSvm = Button(frame, text='Svm', command=Svm)
    buttonSvm.grid(row=0, column=3)

    buttonKnn = Button(frame, text='Knn', command=Knn)
    buttonKnn.grid(row=0, column=4)
Пример #26
0
def realTweets():
    global lc

    try:

        stream = TwitterStream(auth=oauth)
        tweets = stream.statuses.filter(track=TRIGGER_TEXT)
        #twitterInterface = Twitter(auth=oauth)

        showReady()
        print("Ready! Listening for tweets...")

        for tweet in tweets:

            print("Trigger tweet received: " + tweet['text'] + " - " + tweet['user']['name'])
            messageBack = "Hey @" + tweet['user']['screen_name'] + "! Thanks for your tweet. You made my "
            if "jumper flash" in tweet['text'].lower():
                messageBack = messageBack + "whole jumper flash! #ChristmasJumper #Shiplake7 #Creative"
                # twitterInterface.statuses.update(status=messageBack)
                lc.flashAllTogether(ACTIVITY_TIME)
            elif "jumper chase" in tweet['text'].lower():
                messageBack = messageBack + "lights flash up and down! #ChristmasJumper #Shiplake7 #Creative"
                # twitterInterface.statuses.update(status=messageBack)
                lc.flashAllSequence(ACTIVITY_TIME)
            elif "balls flash" in tweet['text'].lower():
                messageBack = messageBack + "bauble lights flash! #ChristmasJumper #Shiplake7 #Creative"
                # twitterInterface.statuses.update(status=messageBack)
                lc.flashBalls(ACTIVITY_TIME)
            elif "star flash" in tweet['text'].lower():
                messageBack = messageBack + "star lights flash! #ChristmasJumper #Shiplake7 #Creative"
                # twitterInterface.statuses.update(status=messageBack)
                lc.flashStar(ACTIVITY_TIME)
            elif "tree flash" in tweet['text'].lower():
                messageBack = messageBack + "tree flash! #ChristmasJumper #Shiplake7 #Creative"
                # twitterInterface.statuses.update(status=messageBack)
                lc.flashTree(ACTIVITY_TIME)
            elif "lights flash" in tweet['text'].lower():
                messageBack = messageBack + "tree flash! #ChristmasJumper #Shiplake7 #Creative"
                # twitterInterface.statuses.update(status=messageBack)
                lc.flashTree(ACTIVITY_TIME)
            elif "jumper on" in tweet['text'].lower():
                messageBack = messageBack + "whole jumper light up! #ChristmasJumper #Shiplake7 #Creative"
                # twitterInterface.statuses.update(status=messageBack)
                lc.on(ACTIVITY_TIME)
            elif "balls on" in tweet['text'].lower():
                messageBack = messageBack + "bauble lights light up! #ChristmasJumper #Shiplake7 #Creative"
                # twitterInterface.statuses.update(status=messageBack)
                lc.ballsOn()
                sleep(ACTIVITY_TIME)
                lc.ballsOff()
            elif "lights on" in tweet['text'].lower():
                messageBack = messageBack + "tree lights turn on! #ChristmasJumper #Shiplake7 #Creative"
                # twitterInterface.statuses.update(status=messageBack)
                lc.treeOn()
                sleep(ACTIVITY_TIME)
                lc.treeOff()
            elif "tree on" in tweet['text'].lower():
                messageBack = messageBack + "tree lights turn on! #ChristmasJumper #Shiplake7 #Creative"
                # twitterInterface.statuses.update(status=messageBack)
                lc.treeOn()
                sleep(ACTIVITY_TIME)
                lc.treeOff()
            elif "star on" in tweet['text'].lower():
                messageBack = messageBack + "star light up! #ChristmasJumper #Shiplake7 #Creative"
                # twitterInterface.statuses.update(status=messageBack)
                lc.starOn()
                sleep(ACTIVITY_TIME)
                lc.starOff()


    except:
        showError()
        print("Error connecting to Twitter. Trying again in 10 seconds.")
        sleep(10)
        realTweets()
Пример #27
0
from twitter import oauth_dance, read_token_file, TwitterStream, OAuth
import os
import matplotlib.pyplot as plt
import matplotlib.style as style
import pandas as pd

CONSUMER_KEY = 'uqiCSPB5CYtMXYN4wV2LUkwiL'
CONSUMER_SECRET = 'q3sSeTZNKKAc4eladDvyVlyDctL2066ht36wpwjYjpByLgWKQJ'

MY_TWITTER_CREDS = os.path.expanduser('~/.my_app_credentials')
if not os.path.exists(MY_TWITTER_CREDS):
    oauth_dance("SentimentVisualizer", CONSUMER_KEY, CONSUMER_SECRET,
                MY_TWITTER_CREDS)

oauth_token, oauth_secret = read_token_file(MY_TWITTER_CREDS)
twitter_stream = TwitterStream(
    auth=OAuth(oauth_token, oauth_secret, CONSUMER_KEY, CONSUMER_SECRET))

iterartor = twitter_stream.statuses.filter(track='weiner')

stop_list = [
    "a", "about", "above", "after", "again", "against", "all", "am", "an",
    "and", "any", "are", "aren't", "as", "at", "be", "because", "been",
    "before", "being", "below", "between", "both", "but", "by", "can't",
    "cannot", "could", "couldn't", "did", "didn't", "do", "does", "doesn't",
    "doing", "don't", "down", "during", "each", "few", "for", "from",
    "further", "had", "hadn't", "has", "hasn't", "have", "haven't", "having",
    "he", "he'd", "he'll", "he's", "her", "here", "here's", "hers", "herself",
    "him", "himself", "his", "how", "how's", "i", "i'd", "i'll", "i'm", "i've",
    "if", "in", "into", "is", "isn't", "it", "it's", "its", "itself", "let's",
    "me", "more", "most", "mustn't", "my", "myself", "no", "nor", "not", "of",
    "off", "on", "once", "only", "or", "other", "ought", "our", "ours",
Пример #28
0
try:
    import json
except ImportError:
    import simplejson as json

from twitter import Twitter, OAuth, TwitterHTTPError, TwitterStream
ACCESS_TOKEN = '**************************************************'
ACCESS_SECRET = '*****************************************'
CONSUMER_KEY = '*********************'
CONSUMER_SECRET = '**************************************************'

oauth = OAuth(ACCESS_TOKEN, ACCESS_SECRET, CONSUMER_KEY, CONSUMER_SECRET)
twitter_userstream = TwitterStream(auth=oauth, domain='userstream.twitter.com')
iterator = twitter_userstream.statuses.filter(track="Google", language="en")
tweet_count = 1
for tweet in iterator:
    tweet_count -= 1
    print(json.dumps(tweet))
    print("\t")
    if tweet_count <= 0:
        break
Пример #29
0
    if connect:
        body = '{"order":0,"template":"*","settings":{},"mappings":{"_default_":{"dynamic_templates":[' \
                      '{"string_fields":{"mapping":{"index":"analyzed","type":"string","fields":{"raw":{' \
                      '"index":"not_analyzed","type":"string"}}},"match_mapping_type":"string","match":"*"}}]'\
                      ',"_all":{"enabled":true}}},"aliases":{}}'
        template = interface.exists_template(ELASTICSEARCH['template'], )
        if template:
            print('Mapping existis, using it.')
        else:
            print('Creating map for use!')
            interface.put_template(name=ELASTICSEARCH['template'], body=body)


if __name__ == '__main__':
    print('TWEPY - Twitter to Elasticsearch Interface with Python')
    stream = TwitterStream(auth=auth())
    tweet_iter = stream.statuses.sample()
    template_es()

    for tweet in tweet_iter:
        if 'delete' in tweet.keys():
            pass

        else:
            timestamp = return_datetime(tweet['created_at'])

            hashtags = return_hashtags(tweet)
            use_hashtags = contains_in_list(hashtags)

            user_mentions = return_user_mentions(tweet)
            use_mentions = contains_in_list(user_mentions)
Пример #30
0
 def __init__(self, ACCESS_TOKEN, ACCESS_SECRET, CONSUMER_KEY, CONSUMER_SECRET):
     oAuth = OAuth(ACCESS_TOKEN, ACCESS_SECRET, CONSUMER_KEY, CONSUMER_SECRET)
     self.twitter_stream = TwitterStream(auth=oAuth)
     self.analyser = Analyser(TA_ACCESS_KEY)
Пример #31
0
 def get_data(self, track, count=10):
     twitter_stream = TwitterStream(auth=self.create_auth())
     iterator = twitter_stream.statuses.filter(track=track, language="en")
     return self.get_relevant_data(iterator, count)
Пример #32
0
# Also post images in replies.

try:
    OAUTH = OAuth(os.environ['TW_ACCESS_TOKEN'],
                  os.environ['TW_ACCESS_SECRET'],
                  os.environ['TW_CONSUMER_KEY'],
                  os.environ['TW_CONSUMER_SECRET'])
    SHORTE_ST_TOKEN = os.environ['SHORTE_ST_TOKEN']
except KeyError:  # For local runs.
    with open('.env', 'r') as secret:
        exec(secret.read())
        OAUTH = OAuth(ACCESS_TOKEN, ACCESS_SECRET, CONSUMER_KEY,
                      CONSUMER_SECRET)

ACCOUNT_HANDLER = Twitter(auth=OAUTH)
STREAM_HANDLER = TwitterStream(auth=OAUTH)


def main():
    """Main function to handle different activites of the account."""

    streamer = managers.StreamThread(
        STREAM_HANDLER, ACCOUNT_HANDLER)  # For the troubling part.
    account_manager = managers.AccountThread(
        ACCOUNT_HANDLER)  # For retweets, likes, follows.
    streamer.start()
    account_manager.run()


# Execute the main() function only if script is executed directly.
if __name__ == "__main__":
from twitter import Twitter
from twitter import OAuth
from twitter import TwitterHTTPError
from twitter import TwitterStream

ck = 'CP7fgUIajeNTjx2GWAOw8gJLn'
cs = 'EW8cDRlfKrF3D91n1OdwqZPtWs2AVy3MqFH7Zxm7usx3f9qkJT'
at = '498725176-adTcq6fMyqlzvEINcg8ujCxUT2f4TafNsLJFg2yx'
ats = 'q94CVXaaAmHXuhQqjL4b26Q5Vdl5lx5PJhQT8f4M6nvfm'

oauth = OAuth(at, ats, ck, cs)
twit_api = Twitter(auth=oauth)
t_loc = twit_api.trends.available()
t_loc

ts = TwitterStream(auth=oauth)

iterator = ts.statuses.filter(track="Bitcoin", language="en")

b = []
for t in iterator:
    b.append(t)
    if len(b) == 50:
        break
len(b)

import json
from pandas.io.json import json_normalize

df = json_normalize(b)
Пример #34
0
# when repo is made public, the keys and tokens will be replaced with placeholders
auth = OAuth(
    consumer_key='2CE1E6U7odFK1MFWeCnOPIh5R',
    consumer_secret='SqqWIvcMGdLbwAqu2oSBzsCr4379aSITLy4AsA9HZyPQxYqCl6',
    token='796842527487889409-hY298XB4dZGxBLU2blhpCVMz14UPQo8',
    token_secret='E9CmwGNpDNffxzU7NjuXernjofYSEF6RyjEKiVantXJap')
# auth = OAuth(  # keys for [email protected] , secondary test account with same login
#     consumer_key='PfV0xdYWs55kstAO4PHF1kIHt',
#     consumer_secret='wYtyvj7EaHBWftLCR8sfYBJKQISu4PhhWszIuLACo0I4jqBgAi',
#     token='792039779068157952-HxKthF9JlcGtDYEiHfT1bn456tJKNLE',
#     token_secret='Fl24QTmnau3vQB3svxDBnepwTL4ifGHvLJVD52PXKXh99'
# )

t = Twitter(auth=auth)

twitter_userstream = TwitterStream(auth=auth, domain='userstream.twitter.com')


def insert_to_database(tweet_obj):
    conn = pymysql.connect(host='localhost',
                           user='******',
                           passwd='thisisthepassword',
                           db='thereminderbot')
    cursor = conn.cursor()
    reminder_str = "INSERT INTO reminders (SENDER, HOUR, MINUTE, PERIOD, " \
                   "TIME_ZONE, MONTH, DAY, MSG, FOLLOWING) VALUES ('{0}', {1}," \
                   " {2}, '{3}', '{4}', {5}, {6}, '{7}', {8});".format(tweet_obj.sender,
                                                                       tweet_obj.hour, tweet_obj.minute, tweet_obj.period,
                                                                       tweet_obj.time_zone, tweet_obj.month, tweet_obj.day, tweet_obj.msg,
                                                                       tweet_obj.following)
    cursor.execute(reminder_str)
Пример #35
0
# Variables that contains the user credentials to access Twitter API
ACCESS_TOKEN = '611030781-0VuUBdJqtq5KLMKYJ1k6UOqXFXblHhmwuGFNdwVZ'
ACCESS_SECRET = 'e1GcCCWKpndCiYvBvzNmgxbCksGb7ktzI1Ne1wdDNZT1n'
CONSUMER_KEY = 'xfg7okUYfazB5t31i9CtRCMkq'
CONSUMER_SECRET = 'qt2ngmhSHDXvAZZBbJK4xc2X6WYHNTduFUWkcVFoY5q5Gxne33'

oauth = OAuth(ACCESS_TOKEN, ACCESS_SECRET, CONSUMER_KEY, CONSUMER_SECRET)

auth = tweepy.OAuthHandler(
    'xfg7okUYfazB5t31i9CtRCMkq',
    'qt2ngmhSHDXvAZZBbJK4xc2X6WYHNTduFUWkcVFoY5q5Gxne33')
auth.set_access_token('611030781-0VuUBdJqtq5KLMKYJ1k6UOqXFXblHhmwuGFNdwVZ',
                      'e1GcCCWKpndCiYvBvzNmgxbCksGb7ktzI1Ne1wdDNZT1n')

# Initiate the connection to Twitter Streaming API
twitter_stream = TwitterStream(auth=oauth)

# Get a sample of the public data following through Twitter
iterator = twitter_stream.statuses.sample()

# Print each tweet in the stream to the screen
# Here we set it to stop after getting 1000 tweets.
# You don't have to set it to stop, but can continue running
# the Twitter API to collect data for days or even longer.
api = tweepy.API(auth)
public_tweets = api.home_timeline()

txts = []
wr_file = open('ipjson.json', 'a+')
tweet_count = 1
def creationtwitter(request,hash1,hash2,hash3):

	
	h1 = hash1
	h2 = hash2
	h3 = hash3

	try:
	    import json
	except ImportError:
	    import simplejson as json


	print h1,h2,h3

	color = ['#2ecc71','#2980b9','#c0392b','#f1c40f','#2c3e50']

	ckey = 'fibkxu7Ki2PjXQM13EOpqNoB8'
	csecret = 'aLs6U02RHTl3Hx1XOyF20SuYAfUpAKGJvEWpca1s8JWqqEw7Wg'
	atoken = '744960984230440960-va4hCQfFm43kPT3kbb7BVwX7Xtnj7Wa'
	asecret = 'pCRHlvHBLsTgPymvtzVNiXX6sl44dBPTEmBHpfMFycd63'

	oauth = OAuth(atoken , asecret, ckey, csecret)

	twitter_stream = TwitterStream(auth=oauth)

	filterString = h1+","+h2+","+h3
	iterator = twitter_stream.statuses.filter(track=filterString)


	total_tweets = []
	total_tweets.append(1)
	t=[]
	cnt_of_h=[0,0,0]
	#width = 0.8

	fig,ax = plt.subplots()
	h = [h1,h2,h3]
	x_pos = list(range(len(h)))

	def word_in_text(word,text):
		print "in word_in_text"
		word = word.lower()
		text = text.lower()
		match = re.search(word,text)
		if match:
			return True
		return False

	def add_data():
		print "in add_data"
		cnt = 0
		if total_tweets[0] > 60:
			return redirect("index")
		for tweet in iterator:
			try:
				x = json.dumps(tweet)
				j = json.loads(x)
				tweet_text = j['text']
				if word_in_text(h1,tweet_text):
					cnt_of_h[0] += 1
				if word_in_text(h2,tweet_text):
					cnt_of_h[1] += 1
				if word_in_text(h3,tweet_text):
					cnt_of_h[2] += 1
			except:
				continue
			cnt += 1
			if cnt == 5:
				break
	        total_tweets[0] += 5
	#    total_tweets += 5
	#    print total_tweets

	def animate(i):
		print " in animate"
		width = 0.8
		add_data()
		ax.clear()
		ax.set_ylabel('Number of tweets')
		stitle = "Ranking : "+h1+" vs. " + h2 + " vs. " + h3
		ax.set_title(stitle)
		ax.set_xticks([p+ 0.2*width for p in x_pos])
		ax.set_xticklabels(h)
		plt.bar(x_pos, cnt_of_h, width, color='g')

	ani = animation.FuncAnimation(fig,animate,interval=10)

	plt.show()
	return redirect('index')
Пример #37
0
def getSparkSessionInstance(sparkConf):

    if ('sparkSessionSingletonInstance' not in globals()):

        globals()['sparkSessionSingletonInstance'] = SparkSession.builder.config(conf=sparkConf).enableHiveSupport().getOrCreate()

    return globals()['sparkSessionSingletonInstance']
    
    def consumer():

    #context = StreamingContext.getOrCreate(checkpointDirectory, functionToCreateContext)

    context = StreamingContext(spark_context, 10)

    dStream = KafkaUtils.createDirectStream(context, ["twitter"], {"metadata.broker.list": "localhost:9092"})

    

    

    #Start Question 1

    dStream.foreachRDD(p1)

    #End Question 1

    

    #Start Question 2

    dStream.foreachRDD(p2)

    #End Question 2



    context.start()

    context.awaitTermination()



def p1(time,rdd):

    #remove field [0]

    rdd=rdd.map(lambda x: json.loads(x[1]))

    records=rdd.collect()    

    records = [element["entities"]["hashtags"] for element in records if "entities" in element] # Select only hashtags part

    records = [x for x in records if x] # Remove empty hashtags

    records = [element[0]["text"] for element in records] # Saving hashtag text in records

    if not records:

        print("Empty List")

    else:

        rdd = spark_context.parallelize(records)

        spark = getSparkSessionInstance(rdd.context.getConf())

        # Convert RDD[String] to RDD[Row] to DataFrame

        hashtagsDataFrame = spark.createDataFrame(rdd.map(lambda x: Row(hashtag=x, time_stamp=time)))

        hashtagsDataFrame.createOrReplaceTempView("hashtags")

        hashtagsDataFrame = spark.sql("select hashtag, count(*) as total, time_stamp from hashtags group by hashtag, time_stamp order by total desc limit 5")

        hashtagsDataFrame.write.mode("append").saveAsTable("hashtag_table")

    

    print(time)



def p2(time,rdd):

    rdd=rdd.map(lambda x: json.loads(x[1]))

    records=rdd.collect()

    

    records = [element["text"] for element in records if "text" in element]

    

    if not records:

        print("Empty List")

    else:
        rdd = spark_context.parallelize(records)
        spark = getSparkSessionInstance(rdd.context.getConf())
        rdd = rdd.map(lambda x: x.split()).flatMap(lambda x: x).map(lambda x: x.lower())
        rdd = rdd.filter(lambda x: x != "a" and x != "and" and x != "an" and x != "are" and x != "as" and x != "at" and x != "be" and x != "by" and x != "for" and x != "from" and x != "has" and x != "he"
        and x != "in" and x != "is" and x != "it" and x != "its" and x != "of" and x != "on" and x != "that" and x != "the" and x != "to" and x != "was" and x != "were" and x != "will" and x != "with")
        keywordDataFrame = spark.createDataFrame(rdd.map(lambda x: Row(keyword=x, time_stamp=time)))
        keywordDataFrame.createOrReplaceTempView("keywords")
        keywordDataFrame = spark.sql("select keyword, count(*) as total, time_stamp from keywords group by keyword, time_stamp order by total desc limit 5")
        keywordDataFrame.write.mode("append").saveAsTable("keywords_table")

if __name__ == "__main__":

    print("Stating to read tweets")

    credentials = read_credentials() 

    oauth = OAuth(credentials['ACCESS_TOKEN'], credentials['ACCESS_SECRET'], credentials['CONSUMER_KEY'], credentials['CONSUMER_SECRET'])

    twitter_stream = TwitterStream(auth=oauth)

    spark_context = SparkContext(appName="First Group Consumer")

    checkpointDirectory = "/checkpoint"

    consumer()
Пример #38
0
import os
import time
import re
from twitter import oauth_dance, read_token_file, TwitterStream, OAuth, Twitter
from local_info import API_key, API_secret

# get accessToken and accessSecret
MY_TWITTER_CREDS = os.path.expanduser(r'.my_app_credentials')
if not os.path.exists(MY_TWITTER_CREDS):
    oauth_dance("100m_tweet_crawler", API_key, API_secret, MY_TWITTER_CREDS)
oauth_token, oauth_secret = read_token_file(MY_TWITTER_CREDS)


twitter = Twitter(auth=OAuth(oauth_token, oauth_secret, API_key, API_secret))
stream = TwitterStream(auth=OAuth(oauth_token, oauth_secret, API_key, API_secret), secure=True)

# NG words
check_chara = ('http', '#', '\\', '【', '】')

# regex
hashtag_pattern = r"[##]([\w一-龠ぁ-んァ-ヴーa-z]+)"
url_pattern = r"^(https?|ftp)://[A-Za-z0-9.-]*$"
r = re.compile(url_pattern)


def trim(text):
    """ replace newline characters with other characters"""
    return text.replace('\r', ' ').replace('\n', ' ')

Пример #39
0
import json

from config import CONSUMER_KEY, CONSUMER_SECRET, TOKEN, TOKEN_SECRET
from twitter import OAuth, TwitterStream

stream = TwitterStream(auth=OAuth(consumer_key=CONSUMER_KEY,
              consumer_secret=CONSUMER_SECRET,
              token=TOKEN,
              token_secret=TOKEN_SECRET))


with open("tweets.txt", "a") as f:
  for msg in stream.statuses.filter(track="EleccionesArgentina,YaVoté,YaVote,Legislativas,Diputados,Senadores"):
    print(json.dumps(msg), file=f)
def get_data():
    while True:
        try:
            import json
        except ImportError:
            import simplejson as json
        # Import the necessary methods from "twitter" library
        from twitter import Twitter, OAuth, TwitterHTTPError, TwitterStream

        # Variables that contains the user credentials to access Twitter API
        ACCESS_TOKEN = '703706843978330112-Vtx3ZBhoay3AoYGky1lCzy9bBMQWDRC'
        ACCESS_SECRET = 'g8dHFdnKpi4xXqmyVGlQLPRnnAqVGtIRmEkwRS2hBzV5S'
        CONSUMER_KEY = '96Q7FV1SgqFHObyGRdq88RUZs'
        CONSUMER_SECRET = 'HDZmc1hVFQI6bG9pxdR47zXaKlz0JDDyGfzVa2L5RNpFFKhAF9'

        oauth = OAuth(ACCESS_TOKEN, ACCESS_SECRET, CONSUMER_KEY,
                      CONSUMER_SECRET)

        # Initiate the connection to Twitter Streaming API
        twitter_stream = TwitterStream(auth=oauth)

        # Get a sample of the public data following through Twitter
        iterator = twitter_stream.statuses.filter(locations='-74,40,-73,41')

        # Print each tweet in the stream to the screen
        # Here we set it to stop after getting 1000 tweets.
        # You don't have to set it to stop, but can continue running
        # the Twitter API to collect data for days or even longer.
        MAX = 1000
        tweet_count = MAX

        raw_data_set = []

        for tweet in iterator:
            # Twitter Python Tool wraps the data returned by Twitter
            # as a TwitterDictResponse object.
            # We convert it back to the JSON format to print/score
            data = json.dumps(tweet)  # dumps can convert dic to json
            data = json.loads(data)  #loads can convert json to dic

            if data["entities"]["hashtags"] == []:
                continue
            data = {
                "hashtags": data["entities"]["hashtags"],
                "text": data["text"],
                "screen_name": data["user"]["screen_name"],
                "followers_count": data["user"]["followers_count"],
                "created_at": data["created_at"],
                "coordinates": data["place"]["bounding_box"]["coordinates"],
                "place": data["place"]["full_name"]
            }
            raw_data_set.append(data)
            # The command below will do pretty printing for JSON data, try it out
            # print json.dumps(tweet, indent=4)
            #counter += 1
            #print "get %d tweets" % counter
            tweet_count -= 1
            print tweet_count
            if tweet_count <= 0:
                raw_data_set = json.dumps(raw_data_set)
                output_file = open("static/data/twitter_data.json", "w")
                output_file.write(raw_data_set)
                output_file.close()
                break

    # Process data

        import common_word
        # Import the necessary package to process data in JSON format
        try:
            import json
        except ImportError:
            import simplejson as json

        # if a hashtag in the result , return True and the index
        # else return False and None
        def in_result(hashtag, result):
            for index in range(len(result)):
                if result[index]["hashtag"] == hashtag:
                    return True, index
            return False, None

        def in_keyword_result(word, keyword_result):
            for index in range(len(keyword_result)):
                if keyword_result[index]["keyword"] == word:
                    return True, index
            return False, None

        input_file = open("static/data/twitter_data.json")
        data = input_file.read()
        input_file.close()
        data = json.loads(data)

        result = []  # this result is the hash_tag

        all_words = []
        keyword_result = []

        mentions_result = []
        mentioners = []

        place_list = []
        treemap_result = []

        for tweet in data:
            #get hash_tag result
            for hashtag in tweet["hashtags"]:
                check, index = in_result(hashtag["text"], result)
                if check == True:
                    result[index]["hashtag_num"] += 1
                else:
                    result.append({
                        "hashtag": hashtag["text"],
                        "hashtag_num": 1
                    })

            #get keyword_result
            words = tweet["text"].split()
            all_words += words

            #get mentions
            if tweet["screen_name"] not in mentioners:
                mentions_result.append({
                    "mentioners":
                    tweet["screen_name"],
                    "mentioners_num":
                    tweet["followers_count"],
                    "text":
                    tweet["text"]
                })
                mentioners.append(tweet["screen_name"])

            #get treemap
            if tweet["place"] not in place_list:
                place_list.append(tweet["place"])
                treemap_result.append({"name": tweet["place"], "size": 1})
            else:
                for e in treemap_result:
                    if e["name"] == tweet["place"]:
                        e["size"] += 1
                        break

        #get keyword_result
        for word in all_words:
            if word in common_word.common_word:
                continue
            check, index = in_keyword_result(word, keyword_result)
            if check == True:
                keyword_result[index]["keyword_num"] += 1
            else:
                keyword_result.append({"keyword": word, "keyword_num": 1})

        # sort hashtag_result by count
        for i in range(1, len(result)):
            for index in range(0, len(result) - i):
                if result[index]["hashtag_num"] < result[index +
                                                         1]["hashtag_num"]:
                    temp = result[index]
                    result[index] = result[index + 1]
                    result[index + 1] = temp

        # sort keyword_result
        for i in range(1, len(keyword_result)):
            for index in range(0, len(keyword_result) - i):
                if keyword_result[index]["keyword_num"] < keyword_result[
                        index + 1]["keyword_num"]:
                    temp = keyword_result[index]
                    keyword_result[index] = keyword_result[index + 1]
                    keyword_result[index + 1] = temp

        # sort mentions_result
        for i in range(1, len(mentions_result)):
            for index in range(0, len(mentions_result) - i):
                if mentions_result[index]["mentioners_num"] < mentions_result[
                        index + 1]["mentioners_num"]:
                    temp = mentions_result[index]
                    mentions_result[index] = mentions_result[index + 1]
                    mentions_result[index + 1] = temp

        # sort treemap
        for i in range(1, len(treemap_result)):
            for index in range(0, len(treemap_result) - i):
                if treemap_result[index]["size"] < treemap_result[index +
                                                                  1]["size"]:
                    temp = treemap_result[index]
                    treemap_result[index] = treemap_result[index + 1]
                    treemap_result[index + 1] = temp

        # this variable store the first 17 value of result
        filtered_result = []
        counter_result = 0
        while len(filtered_result) <= 17:
            if len(result[counter_result]["hashtag"]) <= 13:
                filtered_result.append(result[counter_result])
            counter_result += 1

        filtered_keyword_result = []
        counter_result = 0
        while len(filtered_keyword_result) <= 17:
            if len(keyword_result[counter_result]["keyword"]) <= 13:
                filtered_keyword_result.append(keyword_result[counter_result])
            counter_result += 1

        filtered_mentions_result = []
        counter_result = 0

        try:
            while len(filtered_mentions_result) <= 34:
                if len(mentions_result[counter_result]["mentioners"]) <= 13:
                    filtered_mentions_result.append(
                        mentions_result[counter_result])
                counter_result += 1
        except:
            filtered_mentions_result = mentions_result[0:34]

        filtered_result = json.dumps(filtered_result)
        filtered_keyword_result = json.dumps(filtered_keyword_result)
        filtered_mentions_result = json.dumps(filtered_mentions_result)
        treemap_result = json.dumps(treemap_result[0:30])

        output_file1 = open("static/data/hash_tag.json", "w")
        output_file1.write(filtered_result)
        output_file1.close()

        output_file2 = open("static/data/key_word.json", "w")
        output_file2.write(filtered_keyword_result)
        output_file2.close()

        output_file3 = open("static/data/mentions_tweets.json", "w")
        output_file3.write(filtered_mentions_result)
        output_file3.close()

        output_file4 = open("static/data/treemap.json", "w")
        output_file4.write(treemap_result)
        output_file4.close()