def on_data(self, data): global countLoc, countAll, countAll_intervall, countLoc_intervall, outputgeo, nowDateTime, currentKeyDateTime # update nowDateTime: nowDateTime = getCurrentDateKey() try: tweet = json.loads(data) print('@%s tweeted: %s\nPlace: %s (%s)\n' % ( tweet['user']['screen_name'], tweet['text'], tweet['place'], tweet['coordinates'])) countAll += 1 countAll_intervall += 1 # convert to and write as .geojson // returns None if no geoInfo is provided geoJson = format2geoJSON(tweet) if geoJson != None: with open(outputgeo, 'a+') as outPgeo: json.dump(geoJson, outPgeo) if nowDateTime == currentKeyDateTime: outPgeo.write(',\n') else: outPgeo.write('\n') outPgeo.close() countLoc += 1 countLoc_intervall += 1 if countAll%100 == 0: print "Saw {0} tweets; {1} of them had location information!\n".format(countAll, countLoc) except: pass return True
def handle_tweet(self, data): tweet = json.loads(data) print("\n\n") print('@%s tweeted: %s\nPlace: %s (%s)\n' % ( tweet['user']['screen_name'], tweet['text'], tweet['place'], tweet['coordinates'])) self.countAll += 1 self.countAll_intervall += 1 # convert to and write as .geojson // returns None if no geoInfo is provided geoJson = format2geoJSON(tweet) if geoJson != None: with open(self.outputgeo, 'a+') as outPgeo: json.dump(geoJson, outPgeo) outPgeo.write(',') outPgeo.close() self.countLoc += 1 self.countLoc_intervall += 1
def on_data(self, data): global countLoc, countAll try: tweet = json.loads(data) if countLoc == CUTOFF: exit(0) """ # only show english & german tweets with geo location and or place defined: TODO if (("coordinates" in tweet) or ("place" in tweet)): # and ("lang" in tweet["user"]) and (tweet["user"]["lang"] == "en" or tweet["user"]["lang"] == "de"): if (tweet["coordinates"] == None): print( '@%s tweeted: %s\nPlace: %s\n' % ( tweet['user']['screen_name'], tweet['text'], tweet["place"]) ) elif (tweet["place"] == None): print( '@%s tweeted: %s\nlat, lng: %s\n' % ( tweet['user']['screen_name'], tweet['text'], tweet["coordinates"] ) ) else: print( '@%s tweeted: %s\nPlace, lat, lng: %s, %s\n' % ( tweet['user']['screen_name'], tweet['text'], tweet["place"], tweet["coordinates"]) ) """ print('@%s tweeted: %s\nPlace: %s (%s)\n' % (tweet['user']['screen_name'], tweet['text'], tweet['place'], tweet['coordinates'])) countAll += 1 # write to .txt file if WRITE2TXT: with open(outputfile, 'a+') as outP: outP.write(str(tweet)) outP.write('\n') outP.close() # convert to and write as .geoJSON: geoJson = format2geoJSON(tweet) if geoJson != None: # TODO write in Redis Proxy instance with open(outputgeo, 'a+') as outPgeo: json.dump(geoJson, outPgeo) outPgeo.write(',\n') outPgeo.close() countLoc += 1 # notification: if countAll % 100 == 0: print "Saw {0} tweets; {1} of them had location information!\n".format( countAll, countLoc) except: pass return True
def handle_tweet(self, data): tweet = json.loads(data) print("\n\n") print('@%s tweeted: %s\nPlace: %s (%s)\n' % (tweet['user']['screen_name'], tweet['text'], tweet['place'], tweet['coordinates'])) self.countAll += 1 self.countAll_intervall += 1 # convert to and write as .geojson // returns None if no geoInfo is provided geoJson = format2geoJSON(tweet) if geoJson != None: with open(self.outputgeo, 'a+') as outPgeo: json.dump(geoJson, outPgeo) outPgeo.write(',') outPgeo.close() self.countLoc += 1 self.countLoc_intervall += 1
def on_data(self, data): global countLoc, countAll try: tweet = json.loads(data) if countLoc == CUTOFF : exit(0) """ # only show english & german tweets with geo location and or place defined: TODO if (("coordinates" in tweet) or ("place" in tweet)): # and ("lang" in tweet["user"]) and (tweet["user"]["lang"] == "en" or tweet["user"]["lang"] == "de"): if (tweet["coordinates"] == None): print( '@%s tweeted: %s\nPlace: %s\n' % ( tweet['user']['screen_name'], tweet['text'], tweet["place"]) ) elif (tweet["place"] == None): print( '@%s tweeted: %s\nlat, lng: %s\n' % ( tweet['user']['screen_name'], tweet['text'], tweet["coordinates"] ) ) else: print( '@%s tweeted: %s\nPlace, lat, lng: %s, %s\n' % ( tweet['user']['screen_name'], tweet['text'], tweet["place"], tweet["coordinates"]) ) """ print('@%s tweeted: %s\nPlace: %s (%s)\n' % ( tweet['user']['screen_name'], tweet['text'], tweet['place'], tweet['coordinates'])) countAll += 1 # write to .txt file if WRITE2TXT: with open(outputfile, 'a+') as outP: outP.write(str(tweet)) outP.write('\n') outP.close() # convert to and write as .geoJSON: geoJson = format2geoJSON(tweet) if geoJson != None: # TODO write in Redis Proxy instance with open(outputgeo, 'a+') as outPgeo: json.dump(geoJson, outPgeo) outPgeo.write(',\n') outPgeo.close() countLoc += 1 # notification: if countAll%100 == 0: print "Saw {0} tweets; {1} of them had location information!\n".format(countAll, countLoc) except: pass return True
def on_data(self, data): global countLoc, countAll, countAll_intervall, countLoc_intervall, outputgeo, nowDateTime, currentKeyDateTime try: if str(nowDateTime) == str(currentKeyDateTime): # Changes every hour, so that we publish hourly tweet = json.loads(data) print("\n\n") print('@%s tweeted: %s\nPlace: %s (%s)\n' % ( tweet['user']['screen_name'], tweet['text'], tweet['place'], tweet['coordinates'])) countAll += 1 countAll_intervall += 1 # convert to and write as .geojson // returns None if no geoInfo is provided geoJson = format2geoJSON(tweet) if geoJson != None: with open(outputgeo, 'a+') as outPgeo: json.dump(geoJson, outPgeo) outPgeo.write(',') outPgeo.close() countLoc += 1 countLoc_intervall += 1 # Update time nowDateTime = getCurrentDateKey() else: print "WRITING TO NEW FILE!" if os.path.isfile(outputgeo): print"CHECK: is already a file" # write last line of old one: with open(outputgeo, 'a+') as outPgeo: outPgeo.write(']}') outPgeo.close() # publish old one for one week with open(outputgeo, 'r') as uploadFile: # use a blob in redis to keep structure for better reading in the app uploadFileJSON = json.loads(uploadFile) uploadFile.close() REDIS.setex(outputgeo, uploadFileJSON, 60*60*24*7) # a week in seconds # stats_ARRAY_HOUR_DATE -> {"All_Tweets_seen":countAll, "Location_Tweets_seen":countLoc, "All_Tweets_Intervall":countAll_intervall, "Location_Tweets_Intervall":countLoc_intervall} REDIS.set("stats_{0}_{1}_{2}".format(searchArray, currentKeyDateTime.split(':')[0], currentKeyDateTime.split(':')[1]), {"All_Tweets_seen":countAll, "Location_Tweets_seen":countLoc, "All_Tweets_Intervall":countAll_intervall, "Location_Tweets_Intervall":countLoc_intervall}) countAll_intervall = 0 countLoc_intervall = 0 # Delete old file? if DELETE_OLD: os.remove(outputgeo) print "CHECKPOINT 1" # update KeyDateTime and nowDateTime: currentKeyDateTime = getCurrentDateKey() nowDateTime = getCurrentDateKey() # set new filename: outputgeo = outputgeo_tpl % (searchArray, currentKeyDateTime.split(':')[0], currentKeyDateTime.split(':')[1]) print "FILENAME: %s\n" % (outputgeo) # write first line of new one with open(outputgeo, 'a+') as outPgeo: outPgeo.write('{"type":"FeatureCollection","features":[') outPgeo.close() # Handle the tweet tweet = json.loads(data) print("\n\n") print('@%s tweeted: %s\nPlace: %s (%s)\n' % ( tweet['user']['screen_name'], tweet['text'], tweet['place'], tweet['coordinates'])) countAll += 1 countAll_intervall += 1 # convert to and write as .geojson // returns None if no geoInfo is provided geoJson = format2geoJSON(tweet) if geoJson != None: with open(outputgeo, 'a+') as outPgeo: json.dump(geoJson, outPgeo) outPgeo.write(',') outPgeo.close() countLoc += 1 countLoc_intervall += 1 # Print Notification if countAll%100 == 0: print "Saw {0} tweets; {1} of them had location information!\n".format(countAll, countLoc) except: pass return True
def searchTweets(keywordLists=None, keywords=None, language=None, geo_lat=None, geo_lng=None, geo_rad=None, timeStart=None, timeStop=None, no_entities=False, no_retweets=False, no_links=False, no_answers=False): tweetsFound = [] tweetsCount = 0 tso = TwitterSearchOrder() # remove all restrictions from previos calls: tso.remove_all_filters() # this makes sure no videos/pics are commented tso.set_keywords([ "-video", "-pic", "-foto", "-funny", "-clip", "-vid", "-movie", "-song" ]) # append more synonyms and other languages TODO try: tso = TwitterSearchOrder() if keywordLists != None: for keywordList in keywordLists: tso.add_keyword(keywordList, or_operator=True) if keywords != None: for keyword in keywords: tso.add_keyword(keyword, or_operator=True) if language != None: tso.set_language(str(language)) if geo_rad != None and geo_lat != None and geo_lng != None: tso.set_geocode( geo_lat, geo_lng, geo_rad, imperial_metric=True ) # must be of format: str(lat,lng,radius) + 'km'/'mi' if timeStart != None: tso.add_keyword( 'since:' + str(timeStart)) # time has to be of the format: YYYY-MM-DD if timeStop != None: tso.add_keyword( 'until:' + str(timeStop)) # time has to be of the format: YYYY-MM-DD if no_entities: tso.set_include_entities(False) if no_retweets: pass #tso.set_include_rts(False) #TODO if no_links: pass #TODO if no_answers: pass #tso.set_exclude_replies(True) #TODO # Maybe use sentiment analysis? // tso.set_negative_attitude_filter() ts = TwitterSearch(consumer_key=consumer_key, consumer_secret=consumer_secret, access_token=access_token, access_token_secret=access_token_secret) for tweet in ts.search_tweets_iterable(tso, callback=my_callback): #tweetsFound.append(tweet) tweetsCount += 1 # write to .txt file with open(outputfile, 'a+') as outP: outP.write(str(tweet)) outP.write('\n') outP.close() # convert and write as geoJSON: with open(outputgeo, 'a+') as outPgeo: outPgeo.write(format2geoJSON(tweet)) outPgeo.close() print('@%s tweeted: %s\n' % (tweet['user']['screen_name'], tweet['text'])) except TwitterSearchException as e: print(e) except requests.exceptions.SSLError as e: print(e) return tweetsCount
def searchTweets(keywordLists=None, keywords=None, language=None, geo_lat=None, geo_lng=None, geo_rad=None, timeStart=None, timeStop=None, no_entities=False, no_retweets=False, no_links=False, no_answers=False): tweetsFound = [] tweetsCount = 0 tso = TwitterSearchOrder() # remove all restrictions from previos calls: tso.remove_all_filters() # this makes sure no videos/pics are commented tso.set_keywords(["-video", "-pic", "-foto", "-funny", "-clip", "-vid", "-movie", "-song"]) # append more synonyms and other languages TODO try: tso = TwitterSearchOrder() if keywordLists != None: for keywordList in keywordLists: tso.add_keyword(keywordList, or_operator=True) if keywords != None: for keyword in keywords: tso.add_keyword(keyword, or_operator=True) if language != None: tso.set_language(str(language)) if geo_rad != None and geo_lat != None and geo_lng != None: tso.set_geocode(geo_lat, geo_lng, geo_rad, imperial_metric=True) # must be of format: str(lat,lng,radius) + 'km'/'mi' if timeStart != None: tso.add_keyword('since:' + str(timeStart)) # time has to be of the format: YYYY-MM-DD if timeStop != None: tso.add_keyword('until:' + str(timeStop)) # time has to be of the format: YYYY-MM-DD if no_entities: tso.set_include_entities(False) if no_retweets: pass #tso.set_include_rts(False) #TODO if no_links: pass #TODO if no_answers: pass #tso.set_exclude_replies(True) #TODO # Maybe use sentiment analysis? // tso.set_negative_attitude_filter() ts = TwitterSearch( consumer_key = consumer_key, consumer_secret = consumer_secret, access_token = access_token, access_token_secret = access_token_secret) for tweet in ts.search_tweets_iterable(tso, callback=my_callback): #tweetsFound.append(tweet) tweetsCount += 1 # write to .txt file with open(outputfile, 'a+') as outP: outP.write(str(tweet)) outP.write('\n') outP.close() # convert and write as geoJSON: with open(outputgeo, 'a+') as outPgeo: outPgeo.write(format2geoJSON(tweet)) outPgeo.close() print( '@%s tweeted: %s\n' % ( tweet['user']['screen_name'], tweet['text'] ) ) except TwitterSearchException as e: print(e) except requests.exceptions.SSLError as e: print(e) return tweetsCount
def on_data(self, data): global countLoc, countAll, countAll_intervall, countLoc_intervall, outputgeo, nowDateTime, currentKeyDateTime try: if str(nowDateTime) == str( currentKeyDateTime ): # Changes every hour, so that we publish hourly tweet = json.loads(data) print("\n\n") print('@%s tweeted: %s\nPlace: %s (%s)\n' % (tweet['user']['screen_name'], tweet['text'], tweet['place'], tweet['coordinates'])) countAll += 1 countAll_intervall += 1 # convert to and write as .geojson // returns None if no geoInfo is provided geoJson = format2geoJSON(tweet) if geoJson != None: with open(outputgeo, 'a+') as outPgeo: json.dump(geoJson, outPgeo) outPgeo.write(',') outPgeo.close() countLoc += 1 countLoc_intervall += 1 # Update time nowDateTime = getCurrentDateKey() else: print "WRITING TO NEW FILE!" if os.path.isfile(outputgeo): print "CHECK: is already a file" # write last line of old one: with open(outputgeo, 'a+') as outPgeo: outPgeo.write(']}') outPgeo.close() # publish old one for one week with open(outputgeo, 'r') as uploadFile: # use a blob in redis to keep structure for better reading in the app uploadFileJSON = json.loads(uploadFile) uploadFile.close() REDIS.setex(outputgeo, uploadFileJSON, 60 * 60 * 24 * 7) # a week in seconds # stats_ARRAY_HOUR_DATE -> {"All_Tweets_seen":countAll, "Location_Tweets_seen":countLoc, "All_Tweets_Intervall":countAll_intervall, "Location_Tweets_Intervall":countLoc_intervall} REDIS.set( "stats_{0}_{1}_{2}".format( searchArray, currentKeyDateTime.split(':')[0], currentKeyDateTime.split(':')[1]), { "All_Tweets_seen": countAll, "Location_Tweets_seen": countLoc, "All_Tweets_Intervall": countAll_intervall, "Location_Tweets_Intervall": countLoc_intervall }) countAll_intervall = 0 countLoc_intervall = 0 # Delete old file? if DELETE_OLD: os.remove(outputgeo) print "CHECKPOINT 1" # update KeyDateTime and nowDateTime: currentKeyDateTime = getCurrentDateKey() nowDateTime = getCurrentDateKey() # set new filename: outputgeo = outputgeo_tpl % (searchArray, currentKeyDateTime.split(':')[0], currentKeyDateTime.split(':')[1]) print "FILENAME: %s\n" % (outputgeo) # write first line of new one with open(outputgeo, 'a+') as outPgeo: outPgeo.write('{"type":"FeatureCollection","features":[') outPgeo.close() # Handle the tweet tweet = json.loads(data) print("\n\n") print('@%s tweeted: %s\nPlace: %s (%s)\n' % (tweet['user']['screen_name'], tweet['text'], tweet['place'], tweet['coordinates'])) countAll += 1 countAll_intervall += 1 # convert to and write as .geojson // returns None if no geoInfo is provided geoJson = format2geoJSON(tweet) if geoJson != None: with open(outputgeo, 'a+') as outPgeo: json.dump(geoJson, outPgeo) outPgeo.write(',') outPgeo.close() countLoc += 1 countLoc_intervall += 1 # Print Notification if countAll % 100 == 0: print "Saw {0} tweets; {1} of them had location information!\n".format( countAll, countLoc) except: pass return True