def main(): username = '******' quotes = 'quotes.txt' #Opens quotes file for quotes of anguish (line fo each quote) with open(quotes) as f: quotes = [line.strip() for line in f if line != "\n"] pprint(quotes) auth = OAuth(ACCESS_TOKEN, ACCESS_TOKEN_SECRET, API_KEY, API_SECRET) t = Twitter(auth=auth) ts = TwitterStream(domain='userstream.twitter.com', auth=auth) stream = ts.user() for tweet in stream: #pprint(tweet) if 'event' in tweet: print('received event %s' % tweet['event']) elif 'hangup' in tweet: return elif 'text' in tweet and tweet['user']['screen_name'] != username: print('from @%s: %s' % (tweet['user']['screen_name'], tweet['text'])) line = random.choice(quotes) print('responding with line: %s' % line) reply = '@' + tweet['user']['screen_name'] + ' ' + line t.statuses.update(status=reply, in_reply_to_status_id=tweet['id'])
def get_twitter_stream(): stream = TwitterStream( domain="userstream.twitter.com", api_version="1.1", auth=OAuth(**TWITTER), ) return stream.user()
def _follow_userstream(bot): o = service.config_for(bot).oauth._fields stream = TwitterStream(auth=twitter.OAuth(**o), domain="userstream.twitter.com", block=False) for msg in stream.user(): if msg is not None: service.logger.debug(str(msg)) # Twitter signals start of stream with the "friends" message. if 'friends' in msg: _announce(bot, "\x02twitter:\x02 This channel is now streaming Twitter in real-time.") elif 'text' in msg and 'user' in msg: service.storage_for(bot).last = msg url_format = "(https://twitter.com/{0[user][screen_name]}/status/{0[id_str]})" if 'retweeted_status' in msg: text = "\x02[@{0[user][screen_name]} RT @{0[retweeted_status][user][screen_name]}]\x02 {0[retweeted_status][text]} " + url_format else: text = "\x02[@{0[user][screen_name]}]\x02 {0[text]} " + url_format _announce(bot, text.format(msg)) else: time.sleep(.5) if not service.storage_for(bot).active: return
def stream(self): """Listens to your feed, and updates it whenever someone posts a new tweet.""" twitter_stream = TwitterStream(auth=authenicate(), domain='userstream.twitter.com') for data in twitter_stream.user(): self.feed.values = self.update_feed(data)
def open_stream(self): """ Opens an interface to the Twitter API and opens a stream. """ t = Twitter(auth=self.auth) ts = TwitterStream(domain='userstream.twitter.com', auth=self.auth) self.twitter = t self.stream = ts.user() self.iterator = iter(self.stream)
def GetTimeLineSteam(self, compositionRunner=None, block=True): while True: try: twitter_stream = TwitterStream( domain="userstream.twitter.com", api_version="1.1", auth=OAuth(oauth_token, oauth_secret, CONSUMER_KEY, CONSUMER_SECRET), block=block) iterator = twitter_stream.user() for tweet in iterator: self.ProcessTweet(tweet, compositionRunner) except Exception: time.sleep(60)
def _follow_userstream(ctx): o = ctx.config.oauth._fields stream = TwitterStream(auth=twitter.OAuth(**o), domain="userstream.twitter.com", block=False) reconnect_seconds = [2, 10, 60, 300] reconnect_tries = 0 while ctx.storage.active: try: for msg in stream.user(): if msg is not None: service.logger.debug(str(msg)) # Twitter signals start of stream with the "friends" message. if 'friends' in msg: _announce(ctx, "\x02twitter:\x02 This channel is now streaming Twitter in real-time.") reconnect_tries = 0 elif 'text' in msg and 'user' in msg: memorize_id(ctx, msg["id_str"]) ctx.storage.last = msg url_format = "(https://twitter.com/{0[user][screen_name]}/status/{0[id_str]})" if 'retweeted_status' in msg: text = "\x02[@{0[user][screen_name]} RT @{0[retweeted_status][user][screen_name]}]\x02 {0[retweeted_status][text]} " + url_format else: text = "\x02[@{0[user][screen_name]}]\x02 {0[text]} " + url_format _announce(ctx, text.format(msg)) else: time.sleep(.5) if not ctx.storage.active: return _announce(ctx, "\x02twitter:\x02 Twitter userstream connection lost! Waiting {time} seconds to reconnect.".format( time=reconnect_seconds[reconnect_tries] )) except Exception as e: _announce(ctx, "\x02twitter:\x02 Exception thrown while following userstream! Waiting {time} seconds to reconnect.".format( time=reconnect_seconds[reconnect_tries] )) _announce(ctx, "↳ {name}: {info}".format( name=e.__class__.__name__, info=str(e) )) time.sleep(reconnect_seconds[reconnect_tries]) reconnect_tries += 1
def init(): oauth_client = Oauth(config.get('oauth', 'consumer_key'), config.get('oauth', 'consumer_secret'), config.get('oauth', 'request_token_url'), config.get('oauth', 'access_token_url'), config.get('oauth', 'authorize_url'), version=config.get('oauth', 'version')) request = Request(url=config.get('twitter', 'streaming_filter_url'), method="POST", is_streaming=True, headers={'Accept-Encoding': 'deflate, gzip '}, payload={'locations': '-118.39,30.41,-59.61,49.46'}, token=token) max_stream = int(config.get('twitter', 'max_stream_responses')) topic = config.get('kafka', 'topic') max_skip_invalid_responses = config.getint('twitter', 'max_skip_invalid_response') skip_invalid_responses = config.getboolean('twitter', 'skip_invalid') producer = KeyedProducer(kafka_client, async=True) twitter = TwitterStream(oauth_client, json) tweets = twitter.get_tweets(request) # Starts here. try: if max_stream < 0: send_unlimited_messages(tweets, producer, topic) else: send_limited_messages(max_stream, tweets, producer, topic, skip_invalid_responses, max_skip_invalid_responses) except Exception as e: print e finally: producer.stop() kafka_client.close()
def main(): # open up a file and get a list of lines of lyrics (no blank lines) with open(LYRICS) as lyrics_file: lyrics = [line.strip() for line in lyrics_file if line != "\n"] # print out our list of lyrics (for diagnostics) pprint(lyrics) # get twitter api ready auth = OAuth(ACCESS_TOKEN, ACCESS_TOKEN_SECRET, API_KEY, API_SECRET) t = Twitter(auth=auth) ts = TwitterStream(domain='userstream.twitter.com', auth=auth) # open up our user's stream stream = ts.user() # iterate through every event for tweet in stream: # Print it out nicely, so we can see what happens. pprint(tweet) if 'event' in tweet: print('received event %s' % tweet['event']) elif 'text' in tweet and tweet['user']['screen_name'] != USERNAME: # 'text' means that this is a tweet. If the screen name wasn't our # own, this is someone tweeting at us. # print out the important bits print('from @%s: %s' % (tweet['user']['screen_name'], tweet['text'])) # Pick a lyric, compose a reply, and send it! line = random.choice(lyrics) print('responding with line: %s' % line) reply = '@' + tweet['user']['screen_name'] + ' ' + line t.statuses.update(status=reply, in_reply_to_status_id=tweet['id'])
def _get_iterator(self): """Returns twitter stream iterator object.""" try: return TwitterStream(auth=self.oauth).statuses.sample() except Exception as e: raise e
def _get_stream(self): """Returns twitter stream object.""" try: return TwitterStream(auth=self.oauth) except Exception as e: raise e
def _setup_twitter_stream(self): self._twitter_stream = TwitterStream(auth=self._get_oauth())
os.environ['CONSUMER_KEY'], os.environ['CONSUMER_SECRET'] ) SHORTE_ST_TOKEN = os.environ['SHORTE_ST_TOKEN'] except KeyError: # For local tests. with open('credentials', 'r') as secret: exec(secret.read()) oauth = OAuth( ACCESS_TOKEN, ACCESS_SECRET, CONSUMER_KEY, CONSUMER_SECRET ) t = Twitter(auth=oauth) # For uploading photos. t_upload = Twitter(auth=oauth, domain="upload.twitter.com") ts = TwitterStream(auth=oauth) tu = TwitterStream(auth=oauth, domain="userstream.twitter.com") # Following are some useful wrappers for Twitter-related functionalities. def pf(sn): """ Attempts to print the followers of a user, provided their screen name. """ cursor = -1 next_cursor = 1 while cursor != 0: followers = t.followers.list(screen_name=sn, cursor=cursor)
# Twitter user user = "******" if __name__ == '__main__': try: oauth = OAuth(access_token, access_token_secret, consumer_key, consumer_secret) # Connect to Twitter Streaming API #twitter_stream = TwitterStream(auth = oauth) # UNCOMMENT when ready to test twitter_stream = TwitterStream(auth=oauth, secure=True) # Get an iterator on the public data following through Twitter #tweet_iterator = twitter_stream.statuses.filter(locations='-180,-90,180,90') #print(json.loads(twitter_stream)) # UNCOMMENT when ready to test tweets = twitter_stream.statuses.filter(track=user) for tweet in tweets: #print json.dumps(tweet, indent=2, sort_keys=True) #entities = tweet.get("entities") entities = tweet.get("extended_entities") print json.dumps(entities, indent=2, sort_keys=True) if (entities): print json.dumps(entities, indent=2, sort_keys=True) media_list = entities.get("media") if (media_list):
def get_iterable(): oauth = OAuth(ACCESS_TOKEN, ACCESS_SECRET, CONSUMER_KEY, CONSUMER_SECRET) twitter_stream = TwitterStream(auth=oauth) return twitter_stream.statuses.sample()
def getData(): #getData() - grab data from Twitter API and parsing the information in order to insert into database conn = psycopg2.connect( "dbname='kkosyka_db' host='localhost' user='******' password='******'" ) #(database information - database, host, user, password) cur = conn.cursor() # Import the necessary methods from "twitter" library from twitter import Twitter, OAuth, TwitterHTTPError, TwitterStream # Variables that contains the user credentials to access Twitter API # Access values under Kalynn Kosyka, one may need to change for their projects ACCESS_TOKEN = '28930526-ttro9V7TUvuUfXMe4e3OBMlU38MuKn9ISLUwqMvP9' ACCESS_SECRET = 'dI0t4RRSJU53FciGw1jYfApDkx1x3znrWwH9zSdfetQjh' CONSUMER_KEY = '3KUdtFeceeLB3rs3pJDe4fbeM' CONSUMER_SECRET = 'vPps0BgF2Vm0UZXKdi67URWUnIl5ygk1m5KLRHbXVWwGHCoej1' oauth = OAuth(ACCESS_TOKEN, ACCESS_SECRET, CONSUMER_KEY, CONSUMER_SECRET) # Initiate the connection to Twitter Streaming API twitter_stream = TwitterStream(auth=oauth) twitter = Twitter(auth=oauth) #Creates new file that will contain the most n-number of recent tweets that contain a particular hashtag newFile = writeFile fileName = open(newFile, "a") # Getting data from Twitter and going through the data json_input = twitter.search.tweets( q='#sunset', result_type='recent', lang='en', count=100) #change q value for hashtag query json_input = json.dumps(json_input) s = sched.scheduler(time.time, time.sleep) try: decoded = json.loads(json_input) print len(decoded["statuses"]) numCoor = 0 for x in range(0, len(decoded["statuses"])): coor = False text = ((decoded["statuses"][x]["text"]).encode('ascii', 'ignore')) text = text.replace(",", "/") text = str(text.replace("\n", "<br/>")) coorX = "null" coorY = "null" coordinates = str(decoded["statuses"][x]["coordinates"]).encode( 'ascii', 'ignore') if not len(coordinates) <= 4: #if there are coordinates coor = True numCoor += 1 coordinates = str(decoded["statuses"][x]["coordinates"] ["coordinates"]).encode('ascii', 'ignore') coordinates = coordinates.split(",") coorX = (coordinates[0][1:len(coordinates[0])]) coorY = (coordinates[1][0:len(coordinates[1]) - 1]) if not coor: # filtering so we only get tweets with coordinates, otherwise we skip them continue else: screenName = ( decoded["statuses"][x]["user"]["screen_name"]).encode( 'ascii', 'ignore') createdAt = (decoded["statuses"][x]["created_at"]).encode( 'ascii', 'ignore') hashtags = [] for i in range( 0, len(decoded["statuses"][x]["entities"]["hashtags"])): hashtags.append(decoded["statuses"][x]["entities"] ["hashtags"][i]["text"]) print "num coor:\n" + str(numCoor) print "---------------------" #Write data in CSV format - text, coorX, coorY, username, created at, hashtag(s) #if one wants to save the data into a csv file, uncomment line below #fileName.write(text + "," + coorX +","+ coorY + ","+ screenName +"," + createdAt + ",") hashtagsHolder = "" for j in range(0, len(hashtags)): fileName.write("#" + hashtags[j] + " ") hashtagsHolder = hashtagsHolder + "#" + hashtags[j] + " " printPretty(text, coorX, coorY, screenName, createdAt, hashtagsHolder ) #print data into console, comment out if not needed #Insert data into database - assuming database already exists #database - text(text), xcoor(numeric), ycoor(numeric), username(text), created(text), hashtags(text), twitterGeom (geometry - SRID 4326) cur.execute( """SELECT EXISTS(SELECT 1 FROM public."TwitterDataSample" WHERE text=%s AND xcoor=%s AND ycoor=%s AND username=%s AND created=%s AND hashtags=%s ) """, (text, coorX, coorY, screenName, createdAt, hashtagsHolder)) if cur.fetchone()[0] == False: cur.execute( """INSERT INTO public."TwitterDataSample"(text, xcoor, ycoor, username, created, hashtags) VALUES (%s,%s,%s, %s, %s, %s)""", (text, coorX, coorY, screenName, createdAt, hashtagsHolder)) #Using and converting coordinate values into geometry value with SRID 4326 cur.execute( """UPDATE public."TwitterDataSample" SET "twitterGeom" = ST_GeomFromText('POINT('||xcoor::text||' '||ycoor::text||')', 4326)""" ) conn.commit() except (ValueError, KeyError, TypeError): fileName.write("JSON format error") fileName.close() #print stars, used for pretty printing and dividing info in console - can be commented out print "*********************************************************" s.enter(3600, 1, getData, ()) #run every x sec, 3600s = 1hr s.run()
import json #import simplejson as json import twitter from twitter import Twitter, OAuth, TwitterHTTPError, TwitterStream ACCESS_TOKEN = '1355xxxxx' ACCESS_SECRET = 'xxxxxxxx' CONSUMER_KEY = '1xxxxxxxNR' CONSUMER_SECRET = 'xxxxLLU' oauth = OAuth(ACCESS_TOKEN, ACCESS_SECRET, CONSUMER_KEY, CONSUMER_SECRET) twitter_data = TwitterStream(auth=oauth) twitter_rest_data = Twitter(auth=oauth) getT = twitter_rest_data.search.tweets(q='#Hillary') iterator = twitter_data.statuses.sample() file = open('out.txt', 'w') tweet_count = 10 for tweet in iterator: tweet_count -= 1 pr = json.dumps(tweet, indent=4) file.write(pr) if tweet_count <= 0: break file.close()
if __name__ == "__main__": # Get credentials credentials = get_credentials() output_file = open("output_tweets.json", "a") # Get authentication auth = OAuth(credentials["ACCESS_TOKEN"], credentials["ACCESS_SECRET"], credentials["CONSUMER_KEY"], credentials["CONSUMER_SECRET"]) print("Start getting tweets") # Set up twitter stream keywords = [ 'Flu', 'Zika', 'Ebola', 'Diarrhea', 'Headache', 'Measles', 'flu', 'zika', 'ebola', 'diarrhea', 'headache', 'measles' ] while True: try: stream = TwitterStream(auth=auth, secure=True) tweets = stream.statuses.filter(track=keywords) for tweet in tweets: output_file.write(json.dumps(tweet) + "\n") # Display some tweet information print(json.dumps(tweet)) print("ID: " + str(tweet["id"])) print("User: "******"user"]["screen_name"])) print("Text: " + str(tweet["text"])) except TwitterError as e: # If limit is reached wait 5 minutes print(e) time.sleep(300) except KeyboardInterrupt: print("Program killed") output_file.close()
loc_pad = 0.01 auth = OAuth( creds["access_token"], creds["access_token_secret"], creds["consumer_key"], creds["consumer_secret"] ) twitter = Twitter(auth=auth) t_up = Twitter(domain='upload.twitter.com', auth=auth) twitter_stream = TwitterStream(auth=auth, domain="userstream.twitter.com" ) for tweet in twitter_stream.user(): # print msg # with open("test.json", "rb") as testf: # tweet = json.load(testf) # print json.dumps(tweet,indent=2) if "place" in tweet and tweet["place"] is not None: print "Tweet From", tweet["user"]["screen_name"], tweet["place"] bb = tweet["place"]["bounding_box"] if bb["type"] == "Polygon": min_lat = 90 max_lat = -90
def getData(): global roundCount # Import the necessary methods from "twitter" library from twitter import Twitter, OAuth, TwitterHTTPError, TwitterStream # Variables that contains the user credentials to access Twitter API ACCESS_TOKEN = '28930526-ttro9V7TUvuUfXMe4e3OBMlU38MuKn9ISLUwqMvP9' ACCESS_SECRET = 'dI0t4RRSJU53FciGw1jYfApDkx1x3znrWwH9zSdfetQjh' CONSUMER_KEY = '3KUdtFeceeLB3rs3pJDe4fbeM' CONSUMER_SECRET = 'vPps0BgF2Vm0UZXKdi67URWUnIl5ygk1m5KLRHbXVWwGHCoej1' oauth = OAuth(ACCESS_TOKEN, ACCESS_SECRET, CONSUMER_KEY, CONSUMER_SECRET) # Initiate the connection to Twitter Streaming API twitter_stream = TwitterStream(auth=oauth) twitter = Twitter(auth=oauth) #Creates new file that will contain the most 30 recent tweets that contain #smithcollege newFile = writeFile fileName = open(newFile, "a") #"a" #fileName.write("text, coorX, coorY, username, created at, hashtag(s) \n") """ if (roundCount == 0): print "yes" numCount = 100 else: numCount=5 print "no" print numCount """ json_input = twitter.search.tweets(q='#sunset', result_type='recent', lang='en', count=100) json_input = json.dumps(json_input) s = sched.scheduler(time.time, time.sleep) try: decoded = json.loads(json_input) print len(decoded["statuses"]) numCoor = 0 for x in range(0, len(decoded["statuses"])): coor = False text = ((decoded["statuses"][x]["text"]).encode('ascii', 'ignore')) text = text.replace(",", "/") text = text.replace("\n", "<br/>") coorX = "null" coorY = "null" coordinates = str(decoded["statuses"][x]["coordinates"]).encode( 'ascii', 'ignore') if not len(coordinates) <= 4: #if there are coordinates coor = True numCoor += 1 coordinates = str(decoded["statuses"][x]["coordinates"] ["coordinates"]).encode('ascii', 'ignore') coordinates = coordinates.split(",") coorX = coordinates[0][1:len(coordinates[0])] coorY = coordinates[1][0:len(coordinates[1]) - 1] if not coor: #filtering so we only get tweets with coordinates, otherwise we skip them continue else: screenName = ( decoded["statuses"][x]["user"]["screen_name"]).encode( 'ascii', 'ignore') createdAt = (decoded["statuses"][x]["created_at"]).encode( 'ascii', 'ignore') hashtags = [] for i in range( 0, len(decoded["statuses"][x]["entities"]["hashtags"])): hashtags.append(decoded["statuses"][x]["entities"] ["hashtags"][i]["text"]) print "num coor:\n" + str(numCoor) print "---------------------" #write file in CSV format fileName.write(text + "," + coorX + "," + coorY + "," + screenName + "," + createdAt + ",") for j in range(0, len(hashtags)): fileName.write("#" + hashtags[j] + " ") fileName.write("\n") except (ValueError, KeyError, TypeError): fileName.write("JSON format error") fileName.close() print "*********************************************************" roundCount += 1 s.enter(1800, 1, getData, ()) #1800 s.run()
try: import json except ImportError: import simplejson as json from twitter import Twitter, OAuth, TwitterHTTPError, TwitterStream access_token = "841163774983884800-7MKXWJE5AZrfEGZ7GxupUl7iL35a7VE" access_secret = "VlinyeyHnEBHTUwZ9RCCLCaUBO9C7F99NddEgRfbVxovf" consumer_key = "QiUIFH5wLD1xoTy2KnAVS8J6y" consumer_secret = "cw7gtMfHxFrxuHr1zzPlxiSpTt80TXHXxCWbrcPKUg9RKRSgY8" oauth = OAuth(access_token, access_secret, consumer_key, consumer_secret) twitter_stream = TwitterStream(auth=oauth, domain="userstream.twitter.com") iterator = twitter_stream.statuses.filter(track="walmart") tweet_count = 10000 for tweet in iterator: tweet_count -= 1 print(json.dumps(tweet)) if tweet_count <= 0: break
def create_tweet_iterator(token, token_secret, consumer_key, consumer_secret, bounding_box): oauth = OAuth(token, token_secret, consumer_key, consumer_secret) twitter_stream = TwitterStream(auth=oauth) tweet_iterator = twitter_stream.statuses.filter(locations=bounding_box) return tweet_iterator
def handle(self, *args, **options): if not BaseKeyword.objects.count(): raise CommandError('No keywords found!') keywords = ','.join([k['term'] for k in BaseKeyword.objects.values('term')]) twitter_stream = TwitterStream(auth=OAuth( token=settings.TWITTER_TOKEN, token_secret=settings.TWITTER_TOKEN_SECRET, consumer_key=settings.TWITTER_CONSUMER_KEY, consumer_secret=settings.TWITTER_CONSUMER_SECRET) ) stream = twitter_stream.statuses.filter(track=keywords) for tweet in stream: if 'retweeted_status' in tweet: # If this is a retweet of an earlier tweet, then we want to check only the original. tweet = tweet['retweeted_status'] user = tweet['user'] author = Account( twitter_id=user['id_str'], screen_name=user['screen_name'], name=user['name'], url=user['url'] if 'url' in user else None, status_count=user['statuses_count'] if 'statuses_count' in user else 0, follower_count=user['followers_count'] if 'followers_count' in user else 0, following_count=user['friends_count'] if 'friends_count' in user else 0, listed_in_count=user['listed_count'] if 'listed_count' in user else 0, is_verified=user['verified'] if 'verified' in user else False ) if (tweet['retweet_count'] and tweet['favorite_count'] and (author.get_weight() > 1000 or tweet['entities']['urls'])): # Some debug prints, visual confirmation :) print '-=' * 45 print tweet['text'].encode('ascii', 'ignore') print tweet['created_at'], tweet['favorite_count'], tweet['retweet_count'], author.get_weight() try: author.save() except IntegrityError: author = Account.objects.get(twitter_id=user['id_str']) mentions = list() if tweet['entities']['user_mentions']: for user in tweet['entities']['user_mentions']: try: (mention, created) = Account.objects.get_or_create( twitter_id=user['id_str'], screen_name=user['screen_name'], name=user['name'], url=user['url'] if 'url' in user else None, status_count=user['statuses_count'] if 'statuses_count' in user else 0, follower_count=user['followers_count'] if 'followers_count' in user else 0, following_count=user['friends_count'] if 'friends_count' in user else 0, listed_in_count=user['listed_count'] if 'listed_count' in user else 0, is_verified=user['verified'] if 'verified' in user else False ) except IntegrityError: mention = Account.objects.get(twitter_id=user['id_str']) mentions.append(mention) # try: tw, created = Tweet.objects.get_or_create( tweet_id=tweet['id_str'], defaults=dict( author=author, text=tweet['text'], created_at=parser.parse(tweet['created_at']), favorite_count=tweet['favorite_count'], retweet_count=tweet['retweet_count'] ) ) # except IntegrityError: # tw = Tweet.objects.get(tweet_id=tweet['id_str']) for user in mentions: tw.mentions.add(user) else: continue
auth=OAuth2(conf['twitter']['key'], conf['twitter'] ['secret'])).oauth2. token(grant_type="client_credentials"))['access_token']) SearchConn = Twitter(domain="api.twitter.com", api_version="1.1", format="json", auth=oauth2, secure=True) ResConn = Twitter(domain="api.twitter.com", api_version="1.1", format="json", auth=oauth, secure=True) StreamConn = TwitterStream(domain="stream.twitter.com", api_version="1.1", auth=oauth, secure=True, block=False, timeout=10) except Exception as e: log( 'ERROR', 'Could not initiate connections to Twitter API: %s %s' % (type(e), e)) sys.exit(1) try: locale = timezone(conf['timezone']) except: log('ERROR', "\t".join(all_timezones) + "\n\n") log( 'ERROR', 'Unknown timezone set in config.json: %s. Please choose one among the above ones.' % conf['timezone'])
def preprocessing(): global lista query = entry_1.get() if len(entry_1.get()) == 0: tkMessageBox.showinfo("Ooops", "Please enter a query!") return count = entry_2.get() if len(entry_2.get()) == 0: tkMessageBox.showinfo("Ooops", "Please enter a number!") return try: tweet_count = int(count) except: tkMessageBox.showinfo("Ooops", "Please enter a number!") return double = tweet_count print('Saving tweets') szoveg = "" labellist = [Variable() for i in range(double)] # Initiate the connection to Twitter Streaming API twitter_stream = TwitterStream(auth=oauth) iterator = twitter_stream.statuses.filter(track=query, languages='en') for tweet in iterator: print("-----------------------------------") data = json.loads(json.dumps(tweet)) text = data["text"].encode('utf-8') re00 = re.sub(r'\\', '', text) re001 = re.sub(r'http\S+', '', re00) re002 = re.sub(r'www.\S+', '', re001) p.set_options(p.OPT.URL, p.OPT.EMOJI) cleantweet = p.clean(re002) re1 = re.sub(r'@\S+', '', cleantweet) re2 = re.sub(r'RT', '', re1) #retweeted re3 = re.sub(r'[^a-zA-Z,.?!;: ]', '', re2) re4 = re.sub(r'[,.?!:;]', ' ', re3) re5 = re.sub(r'(.)\1+', r'\1\1', re4).lower() if re4 or not re5.isspace(): lista.append(re5) tweet_count -= 1 szoveg = szoveg + str(re5) + '\n' if tweet_count <= 0: break #----------------tweets #print(szoveg) frame.grid(columnspan=4) label1 = Label(frame, text="tweets") label1.grid(row=0, column=0) j = 0 for i in labellist: l = Label(frame, text=lista[j]) l.grid(row=j + 1, column=0) j = j + 1 #------------checkbuttons label2 = Label(frame, text="Check if positive ") label2.grid(row=0, column=1) checklist = [IntVar() for i in range(double)] for i in checklist: i.set(0) #print(double) j = 1 for i in checklist: c = Checkbutton(frame, variable=i, command=checked) c.grid(row=j, column=1) j = j + 1 buttonProcess.config(state=DISABLED) #-----------algorithms buttonBayes = Button(frame, text='Naive Bayes', command=NaiveBayes) buttonBayes.grid(row=0, column=2) buttonSvm = Button(frame, text='Svm', command=Svm) buttonSvm.grid(row=0, column=3) buttonKnn = Button(frame, text='Knn', command=Knn) buttonKnn.grid(row=0, column=4)
def realTweets(): global lc try: stream = TwitterStream(auth=oauth) tweets = stream.statuses.filter(track=TRIGGER_TEXT) #twitterInterface = Twitter(auth=oauth) showReady() print("Ready! Listening for tweets...") for tweet in tweets: print("Trigger tweet received: " + tweet['text'] + " - " + tweet['user']['name']) messageBack = "Hey @" + tweet['user']['screen_name'] + "! Thanks for your tweet. You made my " if "jumper flash" in tweet['text'].lower(): messageBack = messageBack + "whole jumper flash! #ChristmasJumper #Shiplake7 #Creative" # twitterInterface.statuses.update(status=messageBack) lc.flashAllTogether(ACTIVITY_TIME) elif "jumper chase" in tweet['text'].lower(): messageBack = messageBack + "lights flash up and down! #ChristmasJumper #Shiplake7 #Creative" # twitterInterface.statuses.update(status=messageBack) lc.flashAllSequence(ACTIVITY_TIME) elif "balls flash" in tweet['text'].lower(): messageBack = messageBack + "bauble lights flash! #ChristmasJumper #Shiplake7 #Creative" # twitterInterface.statuses.update(status=messageBack) lc.flashBalls(ACTIVITY_TIME) elif "star flash" in tweet['text'].lower(): messageBack = messageBack + "star lights flash! #ChristmasJumper #Shiplake7 #Creative" # twitterInterface.statuses.update(status=messageBack) lc.flashStar(ACTIVITY_TIME) elif "tree flash" in tweet['text'].lower(): messageBack = messageBack + "tree flash! #ChristmasJumper #Shiplake7 #Creative" # twitterInterface.statuses.update(status=messageBack) lc.flashTree(ACTIVITY_TIME) elif "lights flash" in tweet['text'].lower(): messageBack = messageBack + "tree flash! #ChristmasJumper #Shiplake7 #Creative" # twitterInterface.statuses.update(status=messageBack) lc.flashTree(ACTIVITY_TIME) elif "jumper on" in tweet['text'].lower(): messageBack = messageBack + "whole jumper light up! #ChristmasJumper #Shiplake7 #Creative" # twitterInterface.statuses.update(status=messageBack) lc.on(ACTIVITY_TIME) elif "balls on" in tweet['text'].lower(): messageBack = messageBack + "bauble lights light up! #ChristmasJumper #Shiplake7 #Creative" # twitterInterface.statuses.update(status=messageBack) lc.ballsOn() sleep(ACTIVITY_TIME) lc.ballsOff() elif "lights on" in tweet['text'].lower(): messageBack = messageBack + "tree lights turn on! #ChristmasJumper #Shiplake7 #Creative" # twitterInterface.statuses.update(status=messageBack) lc.treeOn() sleep(ACTIVITY_TIME) lc.treeOff() elif "tree on" in tweet['text'].lower(): messageBack = messageBack + "tree lights turn on! #ChristmasJumper #Shiplake7 #Creative" # twitterInterface.statuses.update(status=messageBack) lc.treeOn() sleep(ACTIVITY_TIME) lc.treeOff() elif "star on" in tweet['text'].lower(): messageBack = messageBack + "star light up! #ChristmasJumper #Shiplake7 #Creative" # twitterInterface.statuses.update(status=messageBack) lc.starOn() sleep(ACTIVITY_TIME) lc.starOff() except: showError() print("Error connecting to Twitter. Trying again in 10 seconds.") sleep(10) realTweets()
from twitter import oauth_dance, read_token_file, TwitterStream, OAuth import os import matplotlib.pyplot as plt import matplotlib.style as style import pandas as pd CONSUMER_KEY = 'uqiCSPB5CYtMXYN4wV2LUkwiL' CONSUMER_SECRET = 'q3sSeTZNKKAc4eladDvyVlyDctL2066ht36wpwjYjpByLgWKQJ' MY_TWITTER_CREDS = os.path.expanduser('~/.my_app_credentials') if not os.path.exists(MY_TWITTER_CREDS): oauth_dance("SentimentVisualizer", CONSUMER_KEY, CONSUMER_SECRET, MY_TWITTER_CREDS) oauth_token, oauth_secret = read_token_file(MY_TWITTER_CREDS) twitter_stream = TwitterStream( auth=OAuth(oauth_token, oauth_secret, CONSUMER_KEY, CONSUMER_SECRET)) iterartor = twitter_stream.statuses.filter(track='weiner') stop_list = [ "a", "about", "above", "after", "again", "against", "all", "am", "an", "and", "any", "are", "aren't", "as", "at", "be", "because", "been", "before", "being", "below", "between", "both", "but", "by", "can't", "cannot", "could", "couldn't", "did", "didn't", "do", "does", "doesn't", "doing", "don't", "down", "during", "each", "few", "for", "from", "further", "had", "hadn't", "has", "hasn't", "have", "haven't", "having", "he", "he'd", "he'll", "he's", "her", "here", "here's", "hers", "herself", "him", "himself", "his", "how", "how's", "i", "i'd", "i'll", "i'm", "i've", "if", "in", "into", "is", "isn't", "it", "it's", "its", "itself", "let's", "me", "more", "most", "mustn't", "my", "myself", "no", "nor", "not", "of", "off", "on", "once", "only", "or", "other", "ought", "our", "ours",
try: import json except ImportError: import simplejson as json from twitter import Twitter, OAuth, TwitterHTTPError, TwitterStream ACCESS_TOKEN = '**************************************************' ACCESS_SECRET = '*****************************************' CONSUMER_KEY = '*********************' CONSUMER_SECRET = '**************************************************' oauth = OAuth(ACCESS_TOKEN, ACCESS_SECRET, CONSUMER_KEY, CONSUMER_SECRET) twitter_userstream = TwitterStream(auth=oauth, domain='userstream.twitter.com') iterator = twitter_userstream.statuses.filter(track="Google", language="en") tweet_count = 1 for tweet in iterator: tweet_count -= 1 print(json.dumps(tweet)) print("\t") if tweet_count <= 0: break
if connect: body = '{"order":0,"template":"*","settings":{},"mappings":{"_default_":{"dynamic_templates":[' \ '{"string_fields":{"mapping":{"index":"analyzed","type":"string","fields":{"raw":{' \ '"index":"not_analyzed","type":"string"}}},"match_mapping_type":"string","match":"*"}}]'\ ',"_all":{"enabled":true}}},"aliases":{}}' template = interface.exists_template(ELASTICSEARCH['template'], ) if template: print('Mapping existis, using it.') else: print('Creating map for use!') interface.put_template(name=ELASTICSEARCH['template'], body=body) if __name__ == '__main__': print('TWEPY - Twitter to Elasticsearch Interface with Python') stream = TwitterStream(auth=auth()) tweet_iter = stream.statuses.sample() template_es() for tweet in tweet_iter: if 'delete' in tweet.keys(): pass else: timestamp = return_datetime(tweet['created_at']) hashtags = return_hashtags(tweet) use_hashtags = contains_in_list(hashtags) user_mentions = return_user_mentions(tweet) use_mentions = contains_in_list(user_mentions)
def __init__(self, ACCESS_TOKEN, ACCESS_SECRET, CONSUMER_KEY, CONSUMER_SECRET): oAuth = OAuth(ACCESS_TOKEN, ACCESS_SECRET, CONSUMER_KEY, CONSUMER_SECRET) self.twitter_stream = TwitterStream(auth=oAuth) self.analyser = Analyser(TA_ACCESS_KEY)
def get_data(self, track, count=10): twitter_stream = TwitterStream(auth=self.create_auth()) iterator = twitter_stream.statuses.filter(track=track, language="en") return self.get_relevant_data(iterator, count)
# Also post images in replies. try: OAUTH = OAuth(os.environ['TW_ACCESS_TOKEN'], os.environ['TW_ACCESS_SECRET'], os.environ['TW_CONSUMER_KEY'], os.environ['TW_CONSUMER_SECRET']) SHORTE_ST_TOKEN = os.environ['SHORTE_ST_TOKEN'] except KeyError: # For local runs. with open('.env', 'r') as secret: exec(secret.read()) OAUTH = OAuth(ACCESS_TOKEN, ACCESS_SECRET, CONSUMER_KEY, CONSUMER_SECRET) ACCOUNT_HANDLER = Twitter(auth=OAUTH) STREAM_HANDLER = TwitterStream(auth=OAUTH) def main(): """Main function to handle different activites of the account.""" streamer = managers.StreamThread( STREAM_HANDLER, ACCOUNT_HANDLER) # For the troubling part. account_manager = managers.AccountThread( ACCOUNT_HANDLER) # For retweets, likes, follows. streamer.start() account_manager.run() # Execute the main() function only if script is executed directly. if __name__ == "__main__":
from twitter import Twitter from twitter import OAuth from twitter import TwitterHTTPError from twitter import TwitterStream ck = 'CP7fgUIajeNTjx2GWAOw8gJLn' cs = 'EW8cDRlfKrF3D91n1OdwqZPtWs2AVy3MqFH7Zxm7usx3f9qkJT' at = '498725176-adTcq6fMyqlzvEINcg8ujCxUT2f4TafNsLJFg2yx' ats = 'q94CVXaaAmHXuhQqjL4b26Q5Vdl5lx5PJhQT8f4M6nvfm' oauth = OAuth(at, ats, ck, cs) twit_api = Twitter(auth=oauth) t_loc = twit_api.trends.available() t_loc ts = TwitterStream(auth=oauth) iterator = ts.statuses.filter(track="Bitcoin", language="en") b = [] for t in iterator: b.append(t) if len(b) == 50: break len(b) import json from pandas.io.json import json_normalize df = json_normalize(b)
# when repo is made public, the keys and tokens will be replaced with placeholders auth = OAuth( consumer_key='2CE1E6U7odFK1MFWeCnOPIh5R', consumer_secret='SqqWIvcMGdLbwAqu2oSBzsCr4379aSITLy4AsA9HZyPQxYqCl6', token='796842527487889409-hY298XB4dZGxBLU2blhpCVMz14UPQo8', token_secret='E9CmwGNpDNffxzU7NjuXernjofYSEF6RyjEKiVantXJap') # auth = OAuth( # keys for [email protected] , secondary test account with same login # consumer_key='PfV0xdYWs55kstAO4PHF1kIHt', # consumer_secret='wYtyvj7EaHBWftLCR8sfYBJKQISu4PhhWszIuLACo0I4jqBgAi', # token='792039779068157952-HxKthF9JlcGtDYEiHfT1bn456tJKNLE', # token_secret='Fl24QTmnau3vQB3svxDBnepwTL4ifGHvLJVD52PXKXh99' # ) t = Twitter(auth=auth) twitter_userstream = TwitterStream(auth=auth, domain='userstream.twitter.com') def insert_to_database(tweet_obj): conn = pymysql.connect(host='localhost', user='******', passwd='thisisthepassword', db='thereminderbot') cursor = conn.cursor() reminder_str = "INSERT INTO reminders (SENDER, HOUR, MINUTE, PERIOD, " \ "TIME_ZONE, MONTH, DAY, MSG, FOLLOWING) VALUES ('{0}', {1}," \ " {2}, '{3}', '{4}', {5}, {6}, '{7}', {8});".format(tweet_obj.sender, tweet_obj.hour, tweet_obj.minute, tweet_obj.period, tweet_obj.time_zone, tweet_obj.month, tweet_obj.day, tweet_obj.msg, tweet_obj.following) cursor.execute(reminder_str)
# Variables that contains the user credentials to access Twitter API ACCESS_TOKEN = '611030781-0VuUBdJqtq5KLMKYJ1k6UOqXFXblHhmwuGFNdwVZ' ACCESS_SECRET = 'e1GcCCWKpndCiYvBvzNmgxbCksGb7ktzI1Ne1wdDNZT1n' CONSUMER_KEY = 'xfg7okUYfazB5t31i9CtRCMkq' CONSUMER_SECRET = 'qt2ngmhSHDXvAZZBbJK4xc2X6WYHNTduFUWkcVFoY5q5Gxne33' oauth = OAuth(ACCESS_TOKEN, ACCESS_SECRET, CONSUMER_KEY, CONSUMER_SECRET) auth = tweepy.OAuthHandler( 'xfg7okUYfazB5t31i9CtRCMkq', 'qt2ngmhSHDXvAZZBbJK4xc2X6WYHNTduFUWkcVFoY5q5Gxne33') auth.set_access_token('611030781-0VuUBdJqtq5KLMKYJ1k6UOqXFXblHhmwuGFNdwVZ', 'e1GcCCWKpndCiYvBvzNmgxbCksGb7ktzI1Ne1wdDNZT1n') # Initiate the connection to Twitter Streaming API twitter_stream = TwitterStream(auth=oauth) # Get a sample of the public data following through Twitter iterator = twitter_stream.statuses.sample() # Print each tweet in the stream to the screen # Here we set it to stop after getting 1000 tweets. # You don't have to set it to stop, but can continue running # the Twitter API to collect data for days or even longer. api = tweepy.API(auth) public_tweets = api.home_timeline() txts = [] wr_file = open('ipjson.json', 'a+') tweet_count = 1
def creationtwitter(request,hash1,hash2,hash3): h1 = hash1 h2 = hash2 h3 = hash3 try: import json except ImportError: import simplejson as json print h1,h2,h3 color = ['#2ecc71','#2980b9','#c0392b','#f1c40f','#2c3e50'] ckey = 'fibkxu7Ki2PjXQM13EOpqNoB8' csecret = 'aLs6U02RHTl3Hx1XOyF20SuYAfUpAKGJvEWpca1s8JWqqEw7Wg' atoken = '744960984230440960-va4hCQfFm43kPT3kbb7BVwX7Xtnj7Wa' asecret = 'pCRHlvHBLsTgPymvtzVNiXX6sl44dBPTEmBHpfMFycd63' oauth = OAuth(atoken , asecret, ckey, csecret) twitter_stream = TwitterStream(auth=oauth) filterString = h1+","+h2+","+h3 iterator = twitter_stream.statuses.filter(track=filterString) total_tweets = [] total_tweets.append(1) t=[] cnt_of_h=[0,0,0] #width = 0.8 fig,ax = plt.subplots() h = [h1,h2,h3] x_pos = list(range(len(h))) def word_in_text(word,text): print "in word_in_text" word = word.lower() text = text.lower() match = re.search(word,text) if match: return True return False def add_data(): print "in add_data" cnt = 0 if total_tweets[0] > 60: return redirect("index") for tweet in iterator: try: x = json.dumps(tweet) j = json.loads(x) tweet_text = j['text'] if word_in_text(h1,tweet_text): cnt_of_h[0] += 1 if word_in_text(h2,tweet_text): cnt_of_h[1] += 1 if word_in_text(h3,tweet_text): cnt_of_h[2] += 1 except: continue cnt += 1 if cnt == 5: break total_tweets[0] += 5 # total_tweets += 5 # print total_tweets def animate(i): print " in animate" width = 0.8 add_data() ax.clear() ax.set_ylabel('Number of tweets') stitle = "Ranking : "+h1+" vs. " + h2 + " vs. " + h3 ax.set_title(stitle) ax.set_xticks([p+ 0.2*width for p in x_pos]) ax.set_xticklabels(h) plt.bar(x_pos, cnt_of_h, width, color='g') ani = animation.FuncAnimation(fig,animate,interval=10) plt.show() return redirect('index')
def getSparkSessionInstance(sparkConf): if ('sparkSessionSingletonInstance' not in globals()): globals()['sparkSessionSingletonInstance'] = SparkSession.builder.config(conf=sparkConf).enableHiveSupport().getOrCreate() return globals()['sparkSessionSingletonInstance'] def consumer(): #context = StreamingContext.getOrCreate(checkpointDirectory, functionToCreateContext) context = StreamingContext(spark_context, 10) dStream = KafkaUtils.createDirectStream(context, ["twitter"], {"metadata.broker.list": "localhost:9092"}) #Start Question 1 dStream.foreachRDD(p1) #End Question 1 #Start Question 2 dStream.foreachRDD(p2) #End Question 2 context.start() context.awaitTermination() def p1(time,rdd): #remove field [0] rdd=rdd.map(lambda x: json.loads(x[1])) records=rdd.collect() records = [element["entities"]["hashtags"] for element in records if "entities" in element] # Select only hashtags part records = [x for x in records if x] # Remove empty hashtags records = [element[0]["text"] for element in records] # Saving hashtag text in records if not records: print("Empty List") else: rdd = spark_context.parallelize(records) spark = getSparkSessionInstance(rdd.context.getConf()) # Convert RDD[String] to RDD[Row] to DataFrame hashtagsDataFrame = spark.createDataFrame(rdd.map(lambda x: Row(hashtag=x, time_stamp=time))) hashtagsDataFrame.createOrReplaceTempView("hashtags") hashtagsDataFrame = spark.sql("select hashtag, count(*) as total, time_stamp from hashtags group by hashtag, time_stamp order by total desc limit 5") hashtagsDataFrame.write.mode("append").saveAsTable("hashtag_table") print(time) def p2(time,rdd): rdd=rdd.map(lambda x: json.loads(x[1])) records=rdd.collect() records = [element["text"] for element in records if "text" in element] if not records: print("Empty List") else: rdd = spark_context.parallelize(records) spark = getSparkSessionInstance(rdd.context.getConf()) rdd = rdd.map(lambda x: x.split()).flatMap(lambda x: x).map(lambda x: x.lower()) rdd = rdd.filter(lambda x: x != "a" and x != "and" and x != "an" and x != "are" and x != "as" and x != "at" and x != "be" and x != "by" and x != "for" and x != "from" and x != "has" and x != "he" and x != "in" and x != "is" and x != "it" and x != "its" and x != "of" and x != "on" and x != "that" and x != "the" and x != "to" and x != "was" and x != "were" and x != "will" and x != "with") keywordDataFrame = spark.createDataFrame(rdd.map(lambda x: Row(keyword=x, time_stamp=time))) keywordDataFrame.createOrReplaceTempView("keywords") keywordDataFrame = spark.sql("select keyword, count(*) as total, time_stamp from keywords group by keyword, time_stamp order by total desc limit 5") keywordDataFrame.write.mode("append").saveAsTable("keywords_table") if __name__ == "__main__": print("Stating to read tweets") credentials = read_credentials() oauth = OAuth(credentials['ACCESS_TOKEN'], credentials['ACCESS_SECRET'], credentials['CONSUMER_KEY'], credentials['CONSUMER_SECRET']) twitter_stream = TwitterStream(auth=oauth) spark_context = SparkContext(appName="First Group Consumer") checkpointDirectory = "/checkpoint" consumer()
import os import time import re from twitter import oauth_dance, read_token_file, TwitterStream, OAuth, Twitter from local_info import API_key, API_secret # get accessToken and accessSecret MY_TWITTER_CREDS = os.path.expanduser(r'.my_app_credentials') if not os.path.exists(MY_TWITTER_CREDS): oauth_dance("100m_tweet_crawler", API_key, API_secret, MY_TWITTER_CREDS) oauth_token, oauth_secret = read_token_file(MY_TWITTER_CREDS) twitter = Twitter(auth=OAuth(oauth_token, oauth_secret, API_key, API_secret)) stream = TwitterStream(auth=OAuth(oauth_token, oauth_secret, API_key, API_secret), secure=True) # NG words check_chara = ('http', '#', '\\', '【', '】') # regex hashtag_pattern = r"[##]([\w一-龠ぁ-んァ-ヴーa-z]+)" url_pattern = r"^(https?|ftp)://[A-Za-z0-9.-]*$" r = re.compile(url_pattern) def trim(text): """ replace newline characters with other characters""" return text.replace('\r', ' ').replace('\n', ' ')
import json from config import CONSUMER_KEY, CONSUMER_SECRET, TOKEN, TOKEN_SECRET from twitter import OAuth, TwitterStream stream = TwitterStream(auth=OAuth(consumer_key=CONSUMER_KEY, consumer_secret=CONSUMER_SECRET, token=TOKEN, token_secret=TOKEN_SECRET)) with open("tweets.txt", "a") as f: for msg in stream.statuses.filter(track="EleccionesArgentina,YaVoté,YaVote,Legislativas,Diputados,Senadores"): print(json.dumps(msg), file=f)
def get_data(): while True: try: import json except ImportError: import simplejson as json # Import the necessary methods from "twitter" library from twitter import Twitter, OAuth, TwitterHTTPError, TwitterStream # Variables that contains the user credentials to access Twitter API ACCESS_TOKEN = '703706843978330112-Vtx3ZBhoay3AoYGky1lCzy9bBMQWDRC' ACCESS_SECRET = 'g8dHFdnKpi4xXqmyVGlQLPRnnAqVGtIRmEkwRS2hBzV5S' CONSUMER_KEY = '96Q7FV1SgqFHObyGRdq88RUZs' CONSUMER_SECRET = 'HDZmc1hVFQI6bG9pxdR47zXaKlz0JDDyGfzVa2L5RNpFFKhAF9' oauth = OAuth(ACCESS_TOKEN, ACCESS_SECRET, CONSUMER_KEY, CONSUMER_SECRET) # Initiate the connection to Twitter Streaming API twitter_stream = TwitterStream(auth=oauth) # Get a sample of the public data following through Twitter iterator = twitter_stream.statuses.filter(locations='-74,40,-73,41') # Print each tweet in the stream to the screen # Here we set it to stop after getting 1000 tweets. # You don't have to set it to stop, but can continue running # the Twitter API to collect data for days or even longer. MAX = 1000 tweet_count = MAX raw_data_set = [] for tweet in iterator: # Twitter Python Tool wraps the data returned by Twitter # as a TwitterDictResponse object. # We convert it back to the JSON format to print/score data = json.dumps(tweet) # dumps can convert dic to json data = json.loads(data) #loads can convert json to dic if data["entities"]["hashtags"] == []: continue data = { "hashtags": data["entities"]["hashtags"], "text": data["text"], "screen_name": data["user"]["screen_name"], "followers_count": data["user"]["followers_count"], "created_at": data["created_at"], "coordinates": data["place"]["bounding_box"]["coordinates"], "place": data["place"]["full_name"] } raw_data_set.append(data) # The command below will do pretty printing for JSON data, try it out # print json.dumps(tweet, indent=4) #counter += 1 #print "get %d tweets" % counter tweet_count -= 1 print tweet_count if tweet_count <= 0: raw_data_set = json.dumps(raw_data_set) output_file = open("static/data/twitter_data.json", "w") output_file.write(raw_data_set) output_file.close() break # Process data import common_word # Import the necessary package to process data in JSON format try: import json except ImportError: import simplejson as json # if a hashtag in the result , return True and the index # else return False and None def in_result(hashtag, result): for index in range(len(result)): if result[index]["hashtag"] == hashtag: return True, index return False, None def in_keyword_result(word, keyword_result): for index in range(len(keyword_result)): if keyword_result[index]["keyword"] == word: return True, index return False, None input_file = open("static/data/twitter_data.json") data = input_file.read() input_file.close() data = json.loads(data) result = [] # this result is the hash_tag all_words = [] keyword_result = [] mentions_result = [] mentioners = [] place_list = [] treemap_result = [] for tweet in data: #get hash_tag result for hashtag in tweet["hashtags"]: check, index = in_result(hashtag["text"], result) if check == True: result[index]["hashtag_num"] += 1 else: result.append({ "hashtag": hashtag["text"], "hashtag_num": 1 }) #get keyword_result words = tweet["text"].split() all_words += words #get mentions if tweet["screen_name"] not in mentioners: mentions_result.append({ "mentioners": tweet["screen_name"], "mentioners_num": tweet["followers_count"], "text": tweet["text"] }) mentioners.append(tweet["screen_name"]) #get treemap if tweet["place"] not in place_list: place_list.append(tweet["place"]) treemap_result.append({"name": tweet["place"], "size": 1}) else: for e in treemap_result: if e["name"] == tweet["place"]: e["size"] += 1 break #get keyword_result for word in all_words: if word in common_word.common_word: continue check, index = in_keyword_result(word, keyword_result) if check == True: keyword_result[index]["keyword_num"] += 1 else: keyword_result.append({"keyword": word, "keyword_num": 1}) # sort hashtag_result by count for i in range(1, len(result)): for index in range(0, len(result) - i): if result[index]["hashtag_num"] < result[index + 1]["hashtag_num"]: temp = result[index] result[index] = result[index + 1] result[index + 1] = temp # sort keyword_result for i in range(1, len(keyword_result)): for index in range(0, len(keyword_result) - i): if keyword_result[index]["keyword_num"] < keyword_result[ index + 1]["keyword_num"]: temp = keyword_result[index] keyword_result[index] = keyword_result[index + 1] keyword_result[index + 1] = temp # sort mentions_result for i in range(1, len(mentions_result)): for index in range(0, len(mentions_result) - i): if mentions_result[index]["mentioners_num"] < mentions_result[ index + 1]["mentioners_num"]: temp = mentions_result[index] mentions_result[index] = mentions_result[index + 1] mentions_result[index + 1] = temp # sort treemap for i in range(1, len(treemap_result)): for index in range(0, len(treemap_result) - i): if treemap_result[index]["size"] < treemap_result[index + 1]["size"]: temp = treemap_result[index] treemap_result[index] = treemap_result[index + 1] treemap_result[index + 1] = temp # this variable store the first 17 value of result filtered_result = [] counter_result = 0 while len(filtered_result) <= 17: if len(result[counter_result]["hashtag"]) <= 13: filtered_result.append(result[counter_result]) counter_result += 1 filtered_keyword_result = [] counter_result = 0 while len(filtered_keyword_result) <= 17: if len(keyword_result[counter_result]["keyword"]) <= 13: filtered_keyword_result.append(keyword_result[counter_result]) counter_result += 1 filtered_mentions_result = [] counter_result = 0 try: while len(filtered_mentions_result) <= 34: if len(mentions_result[counter_result]["mentioners"]) <= 13: filtered_mentions_result.append( mentions_result[counter_result]) counter_result += 1 except: filtered_mentions_result = mentions_result[0:34] filtered_result = json.dumps(filtered_result) filtered_keyword_result = json.dumps(filtered_keyword_result) filtered_mentions_result = json.dumps(filtered_mentions_result) treemap_result = json.dumps(treemap_result[0:30]) output_file1 = open("static/data/hash_tag.json", "w") output_file1.write(filtered_result) output_file1.close() output_file2 = open("static/data/key_word.json", "w") output_file2.write(filtered_keyword_result) output_file2.close() output_file3 = open("static/data/mentions_tweets.json", "w") output_file3.write(filtered_mentions_result) output_file3.close() output_file4 = open("static/data/treemap.json", "w") output_file4.write(treemap_result) output_file4.close()