예제 #1
0
 def build_stream(self):
     if self.stream != None:
         self.stop_query()
         time.sleep(.01) # make sure old stream has time to disconnect
     self.stream = Stream(BasicAuthHandler(self.username, self.password),
                          self, # this object implements StreamListener
                          timeout = 600, # reconnect if no messages in 600s
                          retry_count = 20, # try reconnecting 20 times
                          retry_time = 10.0, # wait 10s if no HTTP 200
                          snooze_time = 1.0) # wait 1s if timeout in 600s
예제 #2
0
 def run_built_query(self, query_built, async):
     self.query = query_built
     self.query.handler.set_tuple_descriptor(self.query.get_tuple_descriptor())
     
     if self.query.source == StatusSource.TWITTER_FILTER:
         self.build_stream() 
         no_filter_exception = QueryException("You haven't specified any filters that can query Twitter.  Perhaps you want to query TWITTER_SAMPLE?")
         try:
             (follow_ids, track_words) = self.query.query_tree.filter_params()
             sys.stdout.flush()
             if (follow_ids == None) and (track_words == [None]):
                 raise no_filter_exception
             self.stream.filter(follow_ids, track_words, async)
         except NotImplementedError:
             raise no_filter_exception
     elif self.query.source == StatusSource.TWITTER_SAMPLE:
         self.build_stream() 
         self.stream.sample(None, async)
     elif self.query.source.startswith(QueryTokens.TABLE):
         self.stream = StreamFromDB(self, self.query.source)
         self.stream.read_from_table(async)
     elif self.query.source.startswith(QueryTokens.STREAM):
         self.stream = StreamFromStream(self, self.query.source)
         self.stream.fetch(async)
예제 #3
0
class QueryRunner(StreamListener):
    def __init__(self):
        register_default_functions()
        StreamListener.__init__(self)
        try:
            self.username = settings.TWITTER_USERNAME
            self.password = settings.TWITTER_PASSWORD
        except AttributeError:
            print "TWITTER_USERNAME and TWITTER_PASSWORD not defined in settings.py"
            self.username = raw_input('Twitter username: '******'Twitter password: '******'t specified any filters that can query Twitter.  Perhaps you want to query TWITTER_SAMPLE?")
            try:
                (follow_ids, track_words) = self.query.query_tree.filter_params()
                sys.stdout.flush()
                if (follow_ids == None) and (track_words == [None]):
                    raise no_filter_exception
                self.stream.filter(follow_ids, track_words, async)
            except NotImplementedError:
                raise no_filter_exception
        elif self.query.source == StatusSource.TWITTER_SAMPLE:
            self.build_stream() 
            self.stream.sample(None, async)
        elif self.query.source.startswith(QueryTokens.TABLE):
            self.stream = StreamFromDB(self, self.query.source)
            self.stream.read_from_table(async)
        elif self.query.source.startswith(QueryTokens.STREAM):
            self.stream = StreamFromStream(self, self.query.source)
            self.stream.fetch(async)
    def run_query(self, query_str, async):
        if isinstance(query_str, str):
            query_str = unicode(query_str, 'utf-8')
        query_built = self.query_builder.build(query_str)
        self.run_built_query(query_built, async)
    def stop_query(self):
        if self.stream != None:
            self.stream.disconnect()
            self.flush_statuses()
    def filter_statuses(self, statuses, query):
        (passes, fails) = query.query_tree.filter(statuses, True, False)
        query.handler.handle_statuses(passes)
    def flush_statuses(self):
        self.status_lock.acquire()
        if len(self.statuses) > 0:
            filter_func = lambda s=self.statuses, q=self.query: self.filter_statuses(s,q)
            t = Thread(target = filter_func)
            t.start()
            self.statuses = []
        self.status_lock.release()

    """ StreamListener methods """
    def on_status(self, status):
        self.status_lock.acquire()
        t = Tuple()
        t.set_tuple_descriptor(None)
        t.set_data(status.__dict__)
        self.statuses.append(t)
        if len(self.statuses) >= self.query.handler.batch_size:
            self.flush_statuses()
        self.status_lock.release()
    def on_error(self, status_code):
        print 'An error has occured! Status code = %s' % status_code
        return True # keep stream alive
    def on_timeout(self):
        print 'Snoozing Zzzzzz'
    def on_limit(self, limit_data):
        print "Twitter rate-limited this query.  Since query start, Twitter dropped %d messages." % (limit_data)