def stream_crawl(keyword): loop = True while (loop): track = keyword listen = TwitterStream(api) stream = tweepy.Stream(auth, listen) # Starting a Stream # stream.filter(track=['python']) print("Twitter streaming started... ") try: # stream.filer( track = track ) stream.filter(track=['python']) loop = False except: print("Error!...And Retry after 60 sec") loop = True stream.disconnect() sleep(5) continue
from TwitterStream import TwitterStream #Driver code for the project print "Main" twitter = TwitterStream() twitter.get_stream("Ivanka")
proxy = os.environ["http_proxy"] except KeyError: print "Not using a proxy. If you want to use a proxy, you need to do something like this" print "export http_proxy=http://www-cache.your.site.com:3128/" proxy = None print username, password, proxy pids = ["b00001", "b00002", "b00003"] # keywords = [ "Sarah Jane", "CBBC"] #trending topics earlier trends_url = "http://api.twitter.com/1/trends/current.json" raw_trending = get_url(trends_url) trending_cooked = cjson.decode(raw_trending) print "Trending Topics", trending_cooked trending_topics = [X["query"] for X in trending_cooked["trends"].values()[0]] keywords = trending_topics # request = [ pids, keywords ] request = [keywords, pids] # docstring wrong, should be this way round Pipeline( DataSource([request]), TwitterStream(username=username, password=password, proxy=proxy), PureTransformer(lambda x: repr(x) + "\n"), ConsoleEchoer(), ).run()
from TwitterStream import TwitterStream api_key = "Put the api key here" api_secret = "Put the api secret here" access_token_key = "Put the token key here" access_token_secret = "Put the token secret here" url = "https://stream.twitter.com/1.1/statuses/filter.json?locations=-122.995004,32.323198,-67.799695,49.893813 & language=en" mytweet = TwitterStream(api_key=api_key, api_secret=api_secret, access_token_key=access_token_key, access_token_secret=access_token_secret, debug=0, http_method="GET") mytweet.fetchsamples(url=url, numberoftweets=25000) output = mytweet.clean_tweets()
10), linkages={ ("self", "inbox"): ("LINKRESOLVE", "inbox"), ("LINKRESOLVE", "outbox"): ("self", "outbox"), ("LINKRESOLVE", "urlrequests"): ("LINKREQUESTER", "inbox"), ("LINKREQUESTER", "outbox"): ("LINKRESOLVE", "responses") }).activate() system = Graphline( CURRENTPROG=WhatsOn(proxy), REQUESTER=Requester( "all", dbuser, dbpass ), # Can set this for specific channels to limit Twitter requests whilst doing dev FIREHOSE=TwitterStream( username, password, proxy, True, 40 ), # Twitter API sends blank lines every 30 secs so timeout of 40 should be fine SEARCH=PeopleSearch(consumerkeypair, keypair, proxy), COLLECTOR=DataCollector(dbuser, dbpass), RAWCOLLECTOR=RawDataCollector(dbuser, dbpass), HTTPGETTER=HTTPGetter(proxy, "BBC R&D Grabber", 10), HTTPGETTERRDF=HTTPGetter(proxy, "BBC R&D Grabber", 10), TWOWAY=TwoWaySplitter(), ANALYSIS=LiveAnalysis(dbuser, dbpass), NLTKANALYSIS=LiveAnalysisNLTK(dbuser, dbpass), TWEETCLEANER=Pipeline( LINKER, RetweetFixer(), RetweetCorrector(dbuser, dbpass), TweetCleaner(['user_mentions', 'urls', 'hashtags'])), NLTKANALYSISFINAL=FinalAnalysisNLTK(dbuser, dbpass), TWEETCLEANERFINAL=Pipeline( LINKERFINAL, RetweetFixer(), RetweetCorrector(dbuser, dbpass),