def start(self): super().start() if self.polling_interval().total_seconds() > 0: self._poll_job = Job(self.poll, self.polling_interval(), True) spawn(self.poll) else: self.logger.info("No poll job")
def start(self): super().start() self.counter = 0 # Schedule interval simulations for the future self._job = Job(self._simulate, self.interval(), True) # But also simulate right away self._simulate()
def start(self): super().start() # Like crontab, check to run jobs every minute self._job = Job(self._cron, timedelta(minutes=1), True) # Run a cron cycle immediately, but in a new thread since it # might take some time and we don't want it to hold up start spawn(self._cron)
def _start_expiry_job(self): # Stop the existing job, if it exists self._stop_expiry_job() self._heartbeat_expiry_job = Job( self._no_heartbeat_response, timedelta(seconds=self._heartbeat_timeout), repeatable=False)
class Collector(object): """ A block mixin that provides collection/buffering functionality. By including this mixin, your block will have a `collect` property added to it which represents the amount of time to collect for. Any subsequent calls to notify signals will not be notified to the block router until the collection window is finished. This is useful for high volume blocks that wish to buffer their output. By setting the collect property to 0, notifying signals will happen immediately, as if this mixin wasn't even included. """ collect = TimeDeltaProperty( title='Collect Timeout', default={"seconds": 1}) def __init__(self): super().__init__() self._collect_job = None self._collect_lock = Lock() self._sigs_out = [] def start(self): # Start the collection job, if we want to be collecting if self._are_we_collecting(): self._collect_job = Job(self._dump_signals, self.collect, True) super().start() def stop(self): if self._collect_job: self._collect_job.cancel() super().stop() def notify_signals(self, signals): """Override the notify signals call to keep collecting""" if self._are_we_collecting(): with self._collect_lock: self._sigs_out.extend(signals) else: super().notify_signals(signals) def _are_we_collecting(self): """Return True if we should be collecting signals""" return self.collect.total_seconds() > 0 def _dump_signals(self): """Notify any signals we have collected this window. This gets called by the scheduled Job. """ with self._collect_lock: if len(self._sigs_out): super().notify_signals(self._sigs_out) self._sigs_out = []
def start(self): super().start() self._authorize() self._construct_url() self._search_job = Job( self._search_tweets, self.interval(), False, self._url )
def start(self): now = datetime.utcnow() latest = self._last_emission or now delta = self.interval - (now - latest) self._emission_job = Job( self.emit, delta, False, reset=True )
class CounterFast(Block): version = VersionProperty("0.1.1") frequency = ObjectProperty( Frequency, title="Report Freqency", default=Frequency()) def configure(self, context): super().configure(context) self._cumulative_count = 0 self._cumulative_count_lock = Lock() if self.frequency().enabled(): self._tracker = FrequencyTracker( total_seconds(self.frequency().averaging_interval())) def start(self): if self.frequency().enabled(): self._job = Job(self.report_frequency, self.frequency().report_interval(), True) def process_signals(self, signals): count = len(signals) self.logger.debug("Ready to process {} signals".format(count)) with self._cumulative_count_lock: if self.frequency().enabled(): self._tracker.record(count) self._cumulative_count += count cumulative_count = self._cumulative_count signal = Signal({ "count": count, "cumulative_count": cumulative_count, }) self.notify_signals([signal]) def report_frequency(self): self.logger.debug("Reporting signal frequency") signal = Signal({"count_frequency": self._tracker.get_frequency()}) self.notify_signals([signal]) def stop(self): try: self._job.cancel() except AttributeError: pass super().stop() def reset(self): with self._cumulative_count_lock: self._cumulative_count = 0 return True def value(self): return self._cumulative_count
def process_signals(self, signals): self.for_each_group(self.process_group, signals) # Start a new job if property is checked and there is no active job if self.signal_start() and not self._active_job: self._emission_job = Job( self._emit_job, self.interval(), False, group=None, reset=False, ) self._active_job = True # Added flag for active job
def start(self): # Start emission job on service start if bool property is not checked if self.interval() and not self.signal_start(): now = datetime.utcnow() latest = self._last_emission or now delta = self.interval() - (now - latest) self._emission_job = Job( self._emit_job, delta, False, group=None, reset=True, )
class SafeTrigger(): """ Guarantees notifying signals every interval, regardless of count """ interval = TimeDeltaProperty(title='Interval', default={'seconds': 1}, order=0) max_count = IntProperty(title='Max Count', default=1, order=1) def __init__(self): super().__init__() self._job = None self.stop_event = Event() self.signal_lock = Lock() def start(self): super().start() self._job = Job(self._emit, self.interval(), True) # Run an emit cycle immediately, but in a new thread since it # might take some time and we don't want it to hold up start spawn(self._emit) def stop(self): """ Stop the simulator thread and signal generation """ if self._job: self._job.cancel() self.stop_event.set() super().stop() def _emit(self): """ Called every *interval* to generate then notify the signals """ self.logger.debug("New generation cycle requested") count = 0 signals = [] # Stop any currently running simulator threads self.stop_event.set() # We only want one simulator thread simulating at a time with self.signal_lock: # Ok, we're running, so clear the event and wait self.stop_event.clear() self.logger.debug("Starting generation...") while count < self.max_count() and not self.stop_event.is_set(): signals.extend(self.generate_signals(1)) count += 1 self.logger.debug("Notifying {} signals".format(len(signals))) self.notify_signals(signals)
def _start_emit_job(self): if self.interval.total_seconds() >= 0: self._emit_job = Job( self.emit, self.interval, True )
def _retry_poll(self, paging=False): """ Helper method to schedule polling retries. """ if self._poll_job is not None: self._poll_job.cancel() self._poll_job = None if self._retry_count < self.retry_limit(): self.logger.debug("Retrying the polling job...") self._retry_count += 1 self._retry_job = Job(self.poll, self._retry_interval, False, paging=paging, in_retry=True) self._update_retry_interval() else: self.logger.error("Out of retries. " "Aborting and changing status to Error.") status_signal = BlockStatusSignal(RunnerStatus.error, 'Out of retries.') # Leaving source for backwards compatibility # In the future, you will know that a status signal is a block # status signal when it contains service_name and name # # TODO: Remove when source gets added to status signals in nio setattr(status_signal, 'source', 'Block') self.notify_management_signal(status_signal)
def _schedule_signal_expiration_job(self, group, input_id): """ Schedule expiration job, cancelling existing job first """ if self._expiration_jobs[group][input_id]: self._expiration_jobs[group][input_id].cancel() self._expiration_jobs[group][input_id] = Job( self._signal_expiration_job, self.expiration(), False, group, input_id)
def start(self): super().start() # If we have configured a backup interval, set up the job to # periodically save to persistence if self.backup_interval.total_seconds() > 0: self._backup_job = Job(self._save, self.backup_interval, True)
def start(self): super().start() self._publisher.open() # create publishing repeatable job self._job = Job(self._deliver_stats, timedelta(seconds=self._stats_timeout), True)
class SafeTrigger(): """ Guarantees notifying signals every interval, regardless of count """ interval = TimeDeltaProperty(title='Interval', default={'seconds': 1}) max_count = IntProperty(title='Max Count', default=1) def __init__(self): super().__init__() self._job = None self.stop_event = Event() self.signal_lock = Lock() def start(self): super().start() self._job = Job(self._emit, self.interval, True) # Run an emit cycle immediately, but in a new thread since it # might take some time and we don't want it to hold up start spawn(self._emit) def stop(self): """ Stop the simulator thread and signal generation """ if self._job: self._job.cancel() self.stop_event.set() super().stop() def _emit(self): """ Called every *interval* to generate then notify the signals """ self._logger.debug("New generation cycle requested") count = 0 signals = [] # Stop any currently running simulator threads self.stop_event.set() # We only want one simulator thread simulating at a time with self.signal_lock: # Ok, we're running, so clear the event and wait self.stop_event.clear() self._logger.debug("Starting generation...") while count < self.max_count and not self.stop_event.is_set(): signals.extend(self.generate_signals(1)) count += 1 self._logger.debug("Notifying {} signals".format(len(signals))) self.notify_signals(signals)
def _schedule_timeout_job(self, signal, key, interval, repeatable): self.logger.debug("Scheduling new timeout job for group {}, " "interval={} repeatable={}".format( key, interval, repeatable)) self._jobs[key][interval] = Job(self._timeout_job, interval, repeatable, signal, key, interval) if repeatable: self._repeatable_jobs[key][interval] = signal
def start(self): super().start() self._start_emit_job() self._backup_job = Job( self._backup, self.backup_interval, True )
def start_heartbeats(self): """ Start a job which will periodically send heartbeats to the server. This method will also start a job that will wait for responses in case the server doesn't respond in time. """ # Since we are starting a new heartbeat cycle, cancel anything # that was outstanding self.stop_heartbeats() # Start a job that will send heartbeats indefinitely self._heartbeat_job = Job(self._heartbeat_func, timedelta(seconds=self._heartbeat_interval), repeatable=True) # Also start a job that will wait for heartbeat timeouts self._start_expiry_job()
class IntervalTrigger(): """Generate signals at a regular interval up to total_signals""" total_signals = IntProperty(title="Total Number of Signals", default=-1, order=4) interval = TimeDeltaProperty(title='Interval', default={'seconds': 1}, order=0) def __init__(self): super().__init__() self.counter = None self._job = None def start(self): super().start() self.counter = 0 # Schedule interval simulations for the future self._job = Job(self._simulate, self.interval(), True) # But also simulate right away self._simulate() def _simulate(self): sigs = self.generate_signals() # If a generator is returned, build the list if not isinstance(sigs, list): sigs = list(sigs) # Add however many signals were generated (in case multiple # signals mixin was used) to the counter and notify them self.counter += len(sigs) # self.counter - self.total_signals() yield that amount of signals that # should be removed if self.counter > self.total_signals() and self.total_signals() >= 0: sigs_to_remove = self.counter - self.total_signals() sigs = sigs[:-1 * sigs_to_remove] self.notify_signals(sigs) if self.total_signals() > 0 and \ self.counter >= self.total_signals(): self._job.cancel() def stop(self): """ Stop the simulator thread. """ self._job.cancel() super().stop()
def _start_emit_job(self): ''' Start job that emits signals from the queue ''' if self.interval() and self.interval().total_seconds() > 0: # only schedule if the interval is a positive number self._emit_job = Job( self.emit, self.interval(), True )
def _emit_job(self, group, reset=False): self.logger.debug('Emitting signals') if reset: self._emission_job.cancel() self._emission_job = Job( self._emit_job, self.interval(), True, group=group, ) self._last_emission = datetime.utcnow() signals = self._get_emit_signals(group) self._active_job = False if signals: self.logger.debug('Notifying {} signals'.format(len(signals))) self.notify_signals(signals) else: self.logger.debug('No signals to notify')
def start(self): super().start() self._authorize() self._start() spawn(self._run_stream) self._notify_job = Job( self._notify_results, self.notify_freq(), True )
def start(self): super().start() if self.polling_interval().total_seconds() > 0: self._poll_job = Job( self.poll, self.polling_interval(), True ) spawn(self.poll) else: self.logger.info("No poll job")
def handle_reconnect(self): self._disconnect_client() # Don't need to reconnect if we are stopping if self._stopping: return if self._reconnection_job is not None: self._logger.debug("Reconnection job already scheduled") return self._logger.warning("Attempting to reconnect in {0} seconds.".format(self._timeout)) self._reconnection_job = Job(self._connect_to_gateway, timedelta(seconds=self._timeout), repeatable=False)
def emit(self, reset=False): self._logger.debug('Emitting signals') if reset: self._emission_job.cancel() self._emission_job = Job( self.emit, self.interval, True ) self._last_emission = datetime.utcnow() signals = self._get_emit_signals() if signals: self._logger.debug('Notifying {} signals'.format(len(signals))) self.notify_signals(signals) elif self.timeout: self._logger.debug('Notifying timeout signal') self.notify_signals([Signal({self.timeout_attr: True})]) else: self._logger.debug('No signals to notify') if self.use_persistence: self.persistence.store('last_emission', self._last_emission) self._backup()
def _epilogue(self): """ This can be overridden in user-defined blocks. Defines behavior after a query has been fully processed, when we are ready for the next query. That is, when paging is done and retries are cleared. """ if self.polling_interval().total_seconds() > 0: self._poll_job = self._poll_job or Job( self.poll, self.polling_interval(), True) self._increment_idx() if self.queries(): self.logger.debug("Preparing to query for: %s" % self.current_query)
def _search_tweets(self, url): rsp = requests.get(url, auth=self._auth) status = rsp.status_code if status == 200: data = rsp.json() tweets = data['statuses'] next_results = data['search_metadata'].get('next_results') self.notify_signals([Signal(t) for t in tweets]) if next_results is not None: self._search_tweets( "{0}{1}".format(SEARCH_URL, next_results) ) else: self.logger.debug("Scheduling next search...") self._search_job = Job( self._search_tweets, self.interval(), False, self._url ) else: self.logger.error( "Twitter search failed with status {0}".format(status))
def __get_publisher(self, topic, ttl): with self._cache_lock: publisher, prev_job = self._cache[topic] if prev_job is not None: prev_job.cancel() job = (Job( self.__close_publisher, ttl, False, topic, ) if ttl.total_seconds() >= 0 else None) self._cache[topic] = (publisher, job) return publisher
def _init_access_token(self): try: self._access_token = self.get_access_token('openid email') self.logger.debug("Obtained access token: {}".format( self._access_token)) if self._reauth_job: self._reauth_job.cancel() # Remember to reauthenticate at a certain point if it's configured if self.reauth_interval().total_seconds() > 0: self._reauth_job = Job(self._init_access_token, self.reauth_interval(), False) except OAuth2Exception: self.logger.exception('Error obtaining access token') self._access_token = None
def start_heartbeats(self): """ Start a job which will periodically send heartbeats to the server. This method will also start a job that will wait for responses in case the server doesn't respond in time. """ # Since we are starting a new heartbeat cycle, cancel anything # that was outstanding self.stop_heartbeats() # Start a job that will send heartbeats indefinitely self._heartbeat_job = Job( self._heartbeat_func, timedelta(seconds=self._heartbeat_interval), repeatable=True) # Also start a job that will wait for heartbeat timeouts self._start_expiry_job()
def process_group_signals(self, signals, group, input_id='repeat'): if input_id == 'cancel': self._cancel_group_job(group) return if len(signals) == 0: return signal = signals[-1] repeats_remaining = self.max_repeats(signal) with self._group_locks[group]: self._cancel_group_job(group) if repeats_remaining == 0: # They don't want to repeat, ignore return self.logger.debug("Setting up repeat for group {}".format(group)) self.notifications[group]['signal'] = signal self.notifications[group]['num_remaining'] = repeats_remaining self.notifications[group]['job'] = Job(target=self.notify_group, delta=self.interval(signal), repeatable=True, group=group)
def start(self): super().start() # use _start_time if it was loaded from persistence self._start_time = self._start_time or _time() self._averaging_seconds = self.averaging_interval().total_seconds() self._job = Job(self.report_frequency, self.report_interval(), True)
class Buffer(Block): interval = TimeDeltaProperty(title='Buffer Interval') interval_duration = TimeDeltaProperty(title='Interval Duration') timeout = BoolProperty(title='Buffer Timeout', default=False) timeout_attr = StringProperty(title='Timeout Attributes', visible=False, default="timeout") use_persistence = BoolProperty(title='Use Persistence?', visible=False, default=False) def __init__(self): super().__init__() self._last_emission = None self._cache = defaultdict(list) self._cache_lock = Lock() self._emission_job = None def configure(self, context): super().configure(context) if self.use_persistence: self._last_emission = self.persistence.load('last_emission') self._cache = self.persistence.load('cache') or defaultdict(list) # For backwards compatability, make sure cache is dict. if not isinstance(self._cache, dict): self._cache = defaultdict(list) def start(self): now = datetime.utcnow() latest = self._last_emission or now delta = self.interval - (now - latest) self._emission_job = Job( self.emit, delta, False, reset=True ) def stop(self): if self.use_persistence: self._backup() def emit(self, reset=False): self._logger.debug('Emitting signals') if reset: self._emission_job.cancel() self._emission_job = Job( self.emit, self.interval, True ) self._last_emission = datetime.utcnow() signals = self._get_emit_signals() if signals: self._logger.debug('Notifying {} signals'.format(len(signals))) self.notify_signals(signals) elif self.timeout: self._logger.debug('Notifying timeout signal') self.notify_signals([Signal({self.timeout_attr: True})]) else: self._logger.debug('No signals to notify') if self.use_persistence: self.persistence.store('last_emission', self._last_emission) self._backup() def _get_emit_signals(self): with self._cache_lock: now = int(time()) signals = [] if self.interval_duration: # Remove old signals from cache. old = now - int(self.interval_duration.total_seconds()) self._logger.debug( 'Removing signals from cache older than {}'.format(old)) cache_times = sorted(self._cache.keys()) for cache_time in cache_times: if cache_time < old: del self._cache[cache_time] else: break for cache in self._cache: signals.extend(self._cache[cache]) if not self.interval_duration: # Clear cache every time if duration is not set. self._logger.debug('Clearing cache of signals') self._cache = defaultdict(list) return signals def process_signals(self, signals): with self._cache_lock: now = int(time()) self._cache[now].extend(signals) def _backup(self): self.persistence.store('cache', self._cache) self.persistence.save()
def _connect_to_streaming(self): """Set up a connection to the Twitter Streaming API. This method will build the connection and save it in self._stream. On a valid connection, it will reset the reconnection and monitoring jobs Returns success (bool): Whether or not the connection succeeded. If any errors occur during connection, it will not schedule the reconnects, but rather just return False. """ try: self._conn = http.client.HTTPSConnection( host=self.streaming_host, timeout=45) req_headers = { 'Content-Type': 'application/x-www-form-urlencoded', 'Accept': '*/*' } conn_url = 'https://{0}/{1}'.format( self.streaming_host, self.streaming_endpoint) # get the signed request with the proper oauth creds req = self._get_oauth_request(conn_url, self.get_params()) self.logger.debug("Connecting to {0}".format(conn_url)) if self.get_request_method() == "POST": self._conn.request(self.get_request_method(), conn_url, body=req.to_postdata(), headers=req_headers) else: self._conn.request(self.get_request_method(), req.to_url(), headers=req_headers) response = self._conn.getresponse() if response.status != 200: self.logger.warning( 'Status: {} returned from twitter: {}'.format( response.status, response.read())) return False else: self.logger.debug('Connected to Streaming API Successfully') # Clear any reconnects we had if self._rc_job is not None: self.logger.error("We were reconnecting, now we're done!") self._rc_job.cancel() self._rc_delay = timedelta(seconds=1) self._rc_job = None self._last_rcv = datetime.utcnow() self._monitor_job = Job( self._monitor_connection, self.rc_interval(), True ) self._stream = response # Return true, we are connected! return True except Exception as e: self.logger.error('Error opening connection : {0}'.format(e)) return False
class TwitterStreamBlock(Block): """ A parent block for communicating with the Twitter Streaming API. Properties: fields (list(str)): Outgoing signals will pull these fields from incoming tweets. When empty/unset, all fields are included. notify_freq (timedelta): The interval between signal notifications. creds: Twitter app credentials, see above. Defaults to global settings. rc_interval (timedelta): Time to wait between receipts (either tweets or hearbeats) before attempting to reconnect to Twitter Streaming. """ notify_freq = TimeDeltaProperty(default={"seconds": 2}, title='Notification Frequency') creds = ObjectProperty(TwitterCreds, title='Credentials', default=TwitterCreds()) rc_interval = TimeDeltaProperty(default={"seconds": 90}, title='Reconnect Interval') streaming_host = None streaming_endpoint = None verify_url = 'https://api.twitter.com/1.1/account/verify_credentials.json' def __init__(self): super().__init__() self._result_signals = defaultdict(list) self._result_lock = defaultdict(Lock) self._lock_lock = Lock() self._stop_event = Event() self._stream = None self._last_rcv = datetime.utcnow() self._limit_count = 0 # Jobs to run throughout execution self._notify_job = None # notifies signals self._monitor_job = None # checks for heartbeats self._rc_job = None # attempts reconnects self._rc_delay = timedelta(seconds=1) def start(self): super().start() self._authorize() self._start() spawn(self._run_stream) self._notify_job = Job( self._notify_results, self.notify_freq(), True ) def _start(self): """ Override in blocks that need to run code before start """ pass def stop(self): self._stop_event.set() self._notify_job.cancel() if self._monitor_job is not None: self._monitor_job.cancel() if self._rc_job is not None: self._rc_job.cancel() super().stop() def _run_stream(self): """ The main thread for the Twitter block. Reads from Twitter streaming, parses and queues results. """ # If we had an existing stream, close it. We will open our own if self._stream: self._stream.close() self._stream = None # This is a new stream so reset the limit count self._limit_count = 0 # Try to connect, if we can't, don't start streaming, but try reconnect if not self._connect_to_streaming(): self._setup_reconnect_attempt() return while(1): if self._stop_event.is_set(): break line = None try: line = self._read_line() except Exception as e: # Error while getting the tweet, this probably indicates a # disconnection so let's try to reconnect self.logger.error("While streaming: %s" % str(e)) self._setup_reconnect_attempt() break if line and len(line): self._record_line(line) def _read_line(self): """Read the next line off of the stream. This will first read the length of the line, then read the next N bytes based on the length. It will return the read line if it reads successfully. Otherwise, returns None. Raises: Exception: if there was an error reading bytes - this will most likely indicate a disconnection """ # build the length buffer buf = bytes('', 'utf-8') while not buf or buf[-1] != ord('\n'): bytes_read = self._read_bytes(1) if bytes_read: buf += bytes_read else: raise Exception("No bytes read from stream") # checking to see if it's a 'keep-alive' if len(buf) <= 2: # only recieved \r\n so it is a keep-alive. move on. self.logger.debug('Received a keep-alive signal from Twitter.') self._last_rcv = datetime.utcnow() return None return self._read_bytes(int(buf)) def _read_bytes(self, n_bytes): """Read N bytes off of the current stream. Returns: len (int): number of bytes actually read - None if no bytes read """ bytes_read = self._stream.read(n_bytes) return bytes_read if len(bytes_read) > 0 else None def get_params(self): """ Return URL connection parameters here """ return {} def _connect_to_streaming(self): """Set up a connection to the Twitter Streaming API. This method will build the connection and save it in self._stream. On a valid connection, it will reset the reconnection and monitoring jobs Returns success (bool): Whether or not the connection succeeded. If any errors occur during connection, it will not schedule the reconnects, but rather just return False. """ try: self._conn = http.client.HTTPSConnection( host=self.streaming_host, timeout=45) req_headers = { 'Content-Type': 'application/x-www-form-urlencoded', 'Accept': '*/*' } conn_url = 'https://{0}/{1}'.format( self.streaming_host, self.streaming_endpoint) # get the signed request with the proper oauth creds req = self._get_oauth_request(conn_url, self.get_params()) self.logger.debug("Connecting to {0}".format(conn_url)) if self.get_request_method() == "POST": self._conn.request(self.get_request_method(), conn_url, body=req.to_postdata(), headers=req_headers) else: self._conn.request(self.get_request_method(), req.to_url(), headers=req_headers) response = self._conn.getresponse() if response.status != 200: self.logger.warning( 'Status: {} returned from twitter: {}'.format( response.status, response.read())) return False else: self.logger.debug('Connected to Streaming API Successfully') # Clear any reconnects we had if self._rc_job is not None: self.logger.error("We were reconnecting, now we're done!") self._rc_job.cancel() self._rc_delay = timedelta(seconds=1) self._rc_job = None self._last_rcv = datetime.utcnow() self._monitor_job = Job( self._monitor_connection, self.rc_interval(), True ) self._stream = response # Return true, we are connected! return True except Exception as e: self.logger.error('Error opening connection : {0}'.format(e)) return False def _setup_reconnect_attempt(self): """Add the reconnection job and double the delay for the next one""" if self._monitor_job is not None: self._monitor_job.cancel() self.logger.debug("Reconnecting in %d seconds" % self._rc_delay.total_seconds()) self._rc_job = Job(self._run_stream, self._rc_delay, False) self._rc_delay *= 2 def get_request_method(self): return "GET" def _get_oauth_request(self, conn_url, request_params): """This function uses the oauthCreds passed from the transducer to sign the request. """ request_params['oauth_version'] = '1.0' request_params['oauth_nonce'] = oauth.generate_nonce() request_params['oauth_timestamp'] = int(time.time()) req = oauth.Request(method=self.get_request_method(), url=conn_url, parameters=request_params) req.sign_request( signature_method=oauth.SignatureMethod_HMAC_SHA1(), consumer=oauth.Consumer( self.creds().consumer_key(), self.creds().app_secret()), token=oauth.Token( self.creds().oauth_token(), self.creds().oauth_token_secret()) ) return req def _record_line(self, line): """ Decode the line and add it to the end of the list """ try: # reset the last received timestamp self._last_rcv = datetime.utcnow() data = json.loads(line.decode('utf-8')) self.create_signal(data) except Exception as e: self.logger.error("Could not parse line: %s" % str(e)) def create_signal(self, data): """ Override this method in the block implementation Append the new Signal to appropriate list in the dictionary `self._result_signals`, where the key is the name of the block output. Below is an example implementation, meant to be overridden. """ self.logger.debug("Default message type") data = self.filter_results(data) if data: with self._get_result_lock('default'): self._result_signals['default'].append(Signal(data)) def _get_result_lock(self, key): with self._lock_lock: return self._result_lock[key] def filter_results(self, data): return data def _notify_results(self): """Method to be called from the notify job, will notify any tweets that have been buffered by the block, then clear the buffer. """ for output in self._result_signals: with self._get_result_lock(output): signals = self._result_signals[output] if signals: self.notify_signals(signals, output) self._result_signals[output] = [] def _monitor_connection(self): """ Scheduled to run every self.rc_interval. Makes sure that some data has been received in the last self.rc_interval. """ current_time = datetime.utcnow() time_since_data = current_time - self._last_rcv if time_since_data > self.rc_interval(): self.logger.warning("No data received, we might be disconnected") self._setup_reconnect_attempt() def _authorize(self): """ Prepare the OAuth handshake and verify. """ try: auth = OAuth1(self.creds().consumer_key(), self.creds().app_secret(), self.creds().oauth_token(), self.creds().oauth_token_secret()) resp = requests.get(self.verify_url, auth=auth) if resp.status_code != 200: raise Exception("Status %s" % resp.status_code) except Exception: self.logger.exception("Authentication Failed for consumer key: %s" % self.creds().consumer_key())
class Queue(GroupBy, Block): """ Queue block. A NIO block for queueing up signals. As signals pile up, the Queue block releases a configurable number at a configurable interval. If incoming signals would overflow the queue, signals are popped off the front as needed. If a 'group_by' string is configured, incoming signals are divided and grouped by the value of that attribute. The configured capacity applies to *each* such queue, not the block as a whole. """ version = StringProperty(default='1.0') interval = TimeDeltaProperty(title='Notification Interval') backup_interval = TimeDeltaProperty(title='Backup Interval', visible=False, default={"minutes": 10}) capacity = IntProperty(default=100, title='Capacity') group_by = ExpressionProperty(default='null', attr_default='null', title='Group By') chunk_size = IntProperty(default=1, title='Chunk Size') reload = BoolProperty(default=False, title='Auto-Reload?') uniqueness = ExpressionProperty(title='Queue Uniqueness Expression', attr_default=None) update = BoolProperty(title='Update Non-Unique Signals', default=False) def __init__(self): super().__init__() self._queues = defaultdict(list) self._queue_locks = { 'null': Lock() } self._meta_lock = Lock() self._emit_job = None self._backup_job = None def configure(self, context): super().configure(context) self._load() def start(self): super().start() self._start_emit_job() self._backup_job = Job( self._backup, self.backup_interval, True ) def stop(self): if self._emit_job is not None: self._emit_job.cancel() self._backup_job.cancel() self._backup() def process_signals(self, signals): self._logger.debug("Processing {} signals".format(len(signals))) self.for_each_group(self._push_group, signals) def pop(self, grp="null"): ''' Remove the top n signals from the specified queue. Args: grp (str): The queue from which to pop. count (int): The number of signals to pop off. reload (bool): If True, put popped signals back on queue. Returns: top_n (list): 'Count' signals from the front of the queue. ''' count = self.chunk_size reload = self.reload # lock the queue we're popping from self._logger.debug("pop: {} {} {}".format(grp, count, reload)) with self._get_lock(grp): # check out the front of the queue top_n = self._queues[grp][0:count] self._logger.debug( "Removing %d signals from %s_queue" % (len(top_n), grp)) self._queues[grp][:] = self._queues[grp][len(top_n):] # If reloading, put signal back on queue. if reload: self._logger.debug("Reloading {}_queue".format(grp)) self._queues[grp].extend(top_n) return top_n def push(self, signal, grp): ''' Add a signal to the back of the queue. Args: signal (Signal): The signal to add. grp (str): Group to add signal to. Returns: None ''' queue = self._queues[grp] # check for uniqueness if property is set try: unique_val = self.uniqueness(signal) self._logger.debug( "Testing uniqueness for signal: {}".format(unique_val)) except Exception as e: unique_val = None self._logger.warning( "Uniqueness expression failed. Using value of None.") if unique_val is not None: for idx, sig in enumerate(queue): try: sig_val = self.uniqueness(sig) except Exception as e: sig_val = None if sig_val == unique_val: self._logger.debug( "Signal {} already in {}_queue".format(sig_val, grp) ) if self.update: queue[idx] = signal return # pop one off the top of that queue if it's at capacity if len(queue) == self.capacity: self._logger.debug( "Pushing signal and capactity of {}_signal is full: {}".format( grp, self.capacity ) ) queue.pop(0) self._logger.debug("Appending signal to {}_queue".format(grp)) queue.append(signal) def _push_group(self, signals, group): # lock the queue before appending with self._get_lock(group): for signal in signals: self.push(signal, group) def _get_lock(self, grp="null"): ''' Returns the lock for a particular queue. Note that we're maintaining a synchronized dictionary of locks alongside our dict of queues. ''' with self._meta_lock: self._queue_locks[grp] = self._queue_locks.get(grp, Lock()) return self._queue_locks[grp] def _start_emit_job(self): if self.interval.total_seconds() >= 0: self._emit_job = Job( self.emit, self.interval, True ) def emit(self): ''' Notify the configured number of signals from the front of the queue. ''' signals_to_notify = self.for_each_group(self.pop) if signals_to_notify: self._logger.debug( "Notifying {} signals".format(len(signals_to_notify)) ) self.notify_signals(signals_to_notify) def _load(self): prev_queues = self.persistence.load('queues') # if persisted dictonary is not defaultdict, convert it if prev_queues: self._queues = defaultdict(list, prev_queues) # build _groups for groupby mixin self._groups = list(self._queues.keys()) def _backup(self): ''' Persist the current state of the queues using the persistence module. ''' # store the serialized signals and save to disk # grab the meta_lock so nobody else can interact with the queues during # serialization self._logger.debug("Persistence: backing up to file") self._meta_lock.acquire() self.persistence.store("queues", self._queues) self._meta_lock.release() self.persistence.save() def _inspect_group(self, response, group): response_group = {'count': 0, 'signals': []} query = response.get('query', '{{ True }}') ignored_signals = [] for signal in self._queues.get(group, []): try: eval = Evaluator(query, None).evaluate(signal) except: eval = False if eval: response_group['signals'].append(json.loads(json.dumps(signal.to_dict(), indent=4, separators=(',', ': '), default=str))) response_group['count'] += 1 response['count'] +=1 else: ignored_signals.append(signal) response['groups'][group] = response_group return response, ignored_signals def view(self, query, group): ''' Command to view the signals that are in the queue. If no group parameter is specified, all queues are returned. ''' self._logger.debug("Command: view") response = {} response['query'] = query response['group'] = group response['count'] = 0 response['groups'] = {} if group and group in self._queues: # if group exists, return only the specified group self._view_group(group, response) elif not group: # if no group is specifed in params return all groups self.for_each_group(self._view_group, kwargs={'response': response}) return response def _view_group(self, group, response): with self._get_lock(group): response, _ = self._inspect_group(response, group) def remove(self, query, group): ''' Remove signals from *group* where *query* is True. Signals are not notified. ''' self._logger.debug("Command: remove") response = {} response['query'] = query response['group'] = group response['count'] = 0 response['groups'] = {} if group and group in self._queues: # if group exists, remove from only only the specified group self._remove_from_group(group, response, query) elif not group: # if no group is specifed in params return all groups self.for_each_group(self._remove_from_group, kwargs={'response': response, 'query': query}) return response def _remove_from_group(self, group, response, query): with self._get_lock(group): response, signals = self._inspect_group(response, group) # signals that don't match the query stay in the queue. self._queues[group] = signals def update_props(self, props): ''' Updates the *interval* property. The next scheduled emit job with be canceled and a new repeatable emit job is started. ''' self._logger.debug("Command: update_props") response = {} if props is None or not isinstance(props, dict): response['message'] = \ "'props' needs to be a dictionary: {}".format(props) return response # Update *interval*. interval = props.get('interval') if interval and isinstance(interval, dict) and \ (interval.get('days') or interval.get('seconds') \ or interval.get('microseconds')): days = interval.get('days', 0) seconds = interval.get('seconds', 0) microseconds = interval.get('microseconds', 0) interval = timedelta(days, seconds, microseconds) response['interval'] = interval response['prev_interval'] = self.interval # cancel emit job and restart with new interval if self._emit_job is not None: self._emit_job.cancel() self._start_emit_job() self.interval = interval self._logger.info('Interval has been updated to {}'.format(interval)) elif interval: response['message'] = "'interval' needs to be a timedelta dict: {}".format(interval) return response
def start(self): super().start() self._start_emit_job() self._backup_job = Job(self._backup, self.backup_interval, True)
class SignalRate(GroupBy, Persistence, Block): report_interval = TimeDeltaProperty(default={"seconds": 1}, title="Report Interval") averaging_interval = TimeDeltaProperty(default={"seconds": 5}, title="Averaging Interval") version = VersionProperty("0.1.1") def __init__(self): super().__init__() self._signal_counts = defaultdict(deque) self._signals_lock = Lock() self._job = None self._start_time = None self._averaging_seconds = None def persisted_values(self): """ Overridden from persistence mixin """ return ['_start_time', '_signal_counts'] def configure(self, context): super().configure(context) # This is just for backwards compatability with persistence if self._signal_counts.default_factory == list: self._signal_counts.default_factory = deque for group in self._signal_counts: self._signal_counts[group] = deque(self._signal_counts[group]) def start(self): super().start() # use _start_time if it was loaded from persistence self._start_time = self._start_time or _time() self._averaging_seconds = self.averaging_interval().total_seconds() self._job = Job(self.report_frequency, self.report_interval(), True) def process_signals(self, signals, input_id='default'): # Record the count for each group in this list of signals self.for_each_group(self.record_count, signals) def record_count(self, signals, group): """ Save the time and the counts for each group received """ with self._signals_lock: self._signal_counts[group].append((_time(), len(signals))) def report_frequency(self): signals = [] self.for_each_group(self.get_frequency, sigs_out=signals) self.logger.debug("Current counts: {}".format(self._signal_counts)) if signals: self.notify_signals(signals) def get_frequency(self, group, sigs_out): """ Get the frequency for a group and add it to sigs_out """ with self._signals_lock: ctime = _time() self._signal_counts[group] = self.trim_old_signals( self._signal_counts[group], ctime) signals = copy(self._signal_counts[group]) # Add up all of our current counts total_count = sum(grp[1] for grp in signals) # If we haven't reached a full period, divide by elapsed time rate = total_count / min(ctime - self._start_time, self._averaging_seconds) sigs_out.append(Signal({"group": group, "rate": rate})) def trim_old_signals(self, signal_counts, ctime): """ Take some signal counts and get rid of old ones """ while len(signal_counts) and \ ctime - signal_counts[0][0] >= self._averaging_seconds: signal_counts.popleft() return signal_counts def stop(self): if self._job: self._job.cancel() super().stop()
class Queue(Persistence, GroupBy, Block): """ Queue block. A NIO block for queueing up signals. As signals pile up, the Queue block releases a configurable number at a configurable interval. If incoming signals would overflow the queue, signals are popped off the front as needed. If a 'group_by' string is configured, incoming signals are divided and grouped by the value of that attribute. The configured capacity applies to *each* such queue, not the block as a whole. """ version = VersionProperty("1.0.1") interval = TimeDeltaProperty(title='Notification Interval', default={'seconds': 1}, allow_none=True) capacity = IntProperty(default=100, title='Capacity') chunk_size = IntProperty(default=1, title='Chunk Size') reload = BoolProperty(default=False, title='Auto-Reload?') uniqueness = Property(title='Queue Uniqueness Expression', allow_none=True, default="{{ None }}") update = BoolProperty(title='Update Non-Unique Signals', default=False) def persisted_values(self): return ["_queues"] def __init__(self): super().__init__() self._queues = defaultdict(list) self._queue_locks = defaultdict(Lock) self._meta_lock = Lock() self._emit_job = None def configure(self, context): super().configure(context) # Make sure perisisted queue capacity is less than current config for queue_name, queue_values in self._queues.items(): self._queues[queue_name] = queue_values[:self.capacity()] # build _groups for groupby mixin self._groups = set(self._queues.keys()) def start(self): super().start() self._start_emit_job() def stop(self): if self._emit_job is not None: self._emit_job.cancel() super().stop() def process_signals(self, signals): self.logger.debug("Processing {} signals".format(len(signals))) self.for_each_group(self._push_group, signals) def pop(self, grp): ''' Remove the top n signals from the specified queue. Args: grp (str): The queue from which to pop. count (int): The number of signals to pop off. reload (bool): If True, put popped signals back on queue. Returns: top_n (list): 'Count' signals from the front of the queue. ''' count = self.chunk_size() reload = self.reload() # lock the queue we're popping from self.logger.debug("pop: {} {} {}".format(grp, count, reload)) with self._get_lock(grp): # check out the front of the queue top_n = self._queues[grp][0:count] self.logger.debug( "Removing %d signals from %s_queue" % (len(top_n), grp)) self._queues[grp][:] = self._queues[grp][len(top_n):] # If reloading, put signal back on queue. if reload: self.logger.debug("Reloading {}_queue".format(grp)) self._queues[grp].extend(top_n) return top_n def push(self, signal, grp): ''' Add a signal to the back of the queue. Args: signal (Signal): The signal to add. grp (str): Group to add signal to. Returns: None ''' queue = self._queues[grp] # check for uniqueness if property is set try: unique_val = self.uniqueness(signal) self.logger.debug( "Testing uniqueness for signal: {}".format(unique_val)) except Exception as e: unique_val = None self.logger.warning( "Uniqueness expression failed. Using value of None.") if unique_val is not None: for idx, sig in enumerate(queue): try: sig_val = self.uniqueness(sig) except Exception as e: sig_val = None if sig_val == unique_val: self.logger.debug( "Signal {} already in {}_queue".format(sig_val, grp) ) if self.update(): queue[idx] = signal return # pop one off the top of that queue if it's at capacity if len(queue) == self.capacity(): self.logger.debug( "Pushing signal and capactity of {}_signal is full: {}".format( grp, self.capacity() ) ) queue.pop(0) self.logger.debug("Appending signal to {}_queue".format(grp)) queue.append(signal) def _push_group(self, signals, group): # lock the queue before appending with self._get_lock(group): for signal in signals: self.push(signal, group) def _get_lock(self, grp): ''' Returns the lock for a particular queue. Note that we're maintaining a synchronized dictionary of locks alongside our dict of queues. ''' with self._meta_lock: self._queue_locks[grp] = self._queue_locks.get(grp, Lock()) return self._queue_locks[grp] def _start_emit_job(self): ''' Start job that emits signals from the queue ''' if self.interval() and self.interval().total_seconds() > 0: # only schedule if the interval is a positive number self._emit_job = Job( self.emit, self.interval(), True ) def emit(self): ''' Notify the configured number of signals from the front of the queue. ''' signals_to_notify = self.for_each_group(self.pop) if signals_to_notify: self.logger.debug( "Notifying {} signals".format(len(signals_to_notify)) ) self.notify_signals(signals_to_notify) def _inspect_group(self, response, group): response_group = {'count': 0, 'signals': []} query = response.get('query', '{{ True }}') ignored_signals = [] for signal in self._queues.get(group, []): try: eval = Evaluator(query).evaluate(signal) except: eval = False if eval: response_group['signals'].append( json.loads(json.dumps( signal.to_dict(), indent=4, separators=(',', ': '), default=str)) ) response_group['count'] += 1 response['count'] += 1 else: ignored_signals.append(signal) response['groups'][group] = response_group return response, ignored_signals def view(self, query, group): ''' Command to view the signals that are in the queue. If no group parameter is specified, all queues are returned. ''' self.logger.debug("Command: view") response = {} response['query'] = query response['group'] = group response['count'] = 0 response['groups'] = {} if group and group in self._queues: # if group exists, return only the specified group self._view_group(group, response) elif not group: # if no group is specifed in params return all groups self.for_each_group(self._view_group, **{'response': response}) return response def _view_group(self, group, response): with self._get_lock(group): response, _ = self._inspect_group(response, group) def remove(self, query, group): ''' Remove signals from *group* where *query* is True. Signals are not notified. ''' self.logger.debug("Command: remove") response = {} response['query'] = query response['group'] = group response['count'] = 0 response['groups'] = {} if group and group in self._queues: # if group exists, remove from only only the specified group self._remove_from_group(group, response, query) elif not group: # if no group is specifed in params return all groups self.for_each_group(self._remove_from_group, **{'response': response, 'query': query}) return response def _remove_from_group(self, group, response, query): with self._get_lock(group): response, signals = self._inspect_group(response, group) # signals that don't match the query stay in the queue, but if # there are no signals remaining, delete the entire queue. if len(signals) > 0: self._queues[group] = signals else: # _queues is a dict with keys that make up the set _groups. # These must be kept in sync when removing keys in order to # maintain the true state of the block. If these objects are # not synced, a "view" or "remove" command for all groups will # show that groups which have previously been expired are still # present, due to the for_each_group() call, which uses the # _groups set to iterate over the groups. self.logger.debug("Deleting empty queue {}.".format(group)) self._queues.pop(group, None) self._groups.remove(group) def update_props(self, props): ''' Updates the *interval* property. The next scheduled emit job with be canceled and a new repeatable emit job is started. ''' self.logger.debug("Command: update_props") response = {} if props is None or not isinstance(props, dict): response['message'] = \ "'props' needs to be a dictionary: {}".format(props) return response # Update *interval*. interval = props.get('interval') if interval and isinstance(interval, dict) and \ (interval.get('days') or interval.get('seconds') or interval.get('microseconds')): days = interval.get('days', 0) seconds = interval.get('seconds', 0) microseconds = interval.get('microseconds', 0) interval = timedelta(days, seconds, microseconds) response['interval'] = interval response['prev_interval'] = self.interval # cancel emit job and restart with new interval if self._emit_job is not None: self._emit_job.cancel() self._start_emit_job() self.interval = interval self.logger.info( 'Interval has been updated to {}'.format(interval)) elif interval: response['message'] = \ "'interval' needs to be a timedelta dict: {}".format(interval) return response
class ARTSS(Block): """ A block for communicating with a artss gateway server. Properties: host (str): location of the socket.io server. port (int): socket.io server port. version (enum): Which version of socketIO to use max_retry (int): Specifies maximum time to wait before a retry """ version = VersionProperty("1.0.1") host = StringProperty(title="Host", default="") port = IntProperty(title="Port", default=10001) history_count = IntProperty(title="Event History Count", default=2) connection_timeout = IntProperty(title="Connection Timeout", default=5) max_retry = TimeDeltaProperty(title="Max Connection Retry Time", default={"seconds": 300}) def __init__(self): super().__init__() self._client = None self._timeout = 1 self._connected = False self._reconnection_job = None self._stopping = False self._polling = False def configure(self, context): super().configure(context) # override loggers with our own from .protocol.artss import set_logger as set_protocol_logger set_protocol_logger(self._logger) from .gateway import set_logger as set_gateway_logger set_gateway_logger(self._logger) self._client = ARTSSGateway(self.history_count) self._connect_to_gateway() def stop(self): """ Stop the block by disconnecting the client. """ self._stopping = True # Cancel any pending reconnects if any self._cancel_reconnection_job() self._disconnect_client() super().stop() def handle_reconnect(self): self._disconnect_client() # Don't need to reconnect if we are stopping if self._stopping: return if self._reconnection_job is not None: self._logger.debug("Reconnection job already scheduled") return self._logger.warning("Attempting to reconnect in {0} seconds.".format(self._timeout)) self._reconnection_job = Job(self._connect_to_gateway, timedelta(seconds=self._timeout), repeatable=False) def _cancel_reconnection_job(self): if self._reconnection_job: self._reconnection_job.cancel() self._reconnection_job = None def _connect_to_gateway(self): try: # clean up any connection job if any self._cancel_reconnection_job() self._logger.info("Connecting to {0}:{1}".format(self.host, self.port)) self._client.connect( self.host, self.port, timeout=self.connection_timeout if self.connection_timeout else None ) self._connected = True self._logger.info("Connected successfully") # Reset the timeout self._timeout = 1 except: self._timeout *= 2 # Make sure our timeout is not getting out of hand self._timeout = min(self._timeout, self.max_retry.total_seconds()) self._logger.exception("Error connecting") self.handle_reconnect() def process_signals(self, signals): """ Used to poll gateway. """ # Don't do any processing or polling if the block is stopping. # The connection may be closed and we don't want to re-open if self._stopping: return if self._connected and not self._polling: try: self._polling = True events = self._client.poll() if events: signals = [] for event in events: signals.append(Signal(event.to_dict())) self._logger.debug("Notifying: {0} signals".format(len(signals))) self.notify_signals(signals) except Exception as e: self._logger.exception("Polling ARTSS Gateway") # connection problems are handled through reconnect if isinstance(e, ARTSSGatewayConnectionClosed): self.handle_reconnect() finally: self._polling = False def _disconnect_client(self): """ Safely close the client and remove the reference """ try: # Try to close the client if it's open self._connected = False self._client.disconnect() except: # If we couldn't close, it's fine. Either the client wasn't # opened or it didn't want to respond. That's what we get for # being nice and cleaning up our connection self._logger.info("Error closing gateway connection", exc_info=True)
class Buffer(Persistence, GroupBy, Block): version = VersionProperty("0.1.1") signal_start = BoolProperty(title='Start Interval On Signal?', default=False) interval = TimeDeltaProperty(title='Buffer Interval', default={'seconds': 1}, allow_none=True) interval_duration = TimeDeltaProperty(title='Interval Duration', allow_none=True) def __init__(self): super().__init__() self._last_emission = None self._cache = defaultdict(lambda: defaultdict(list)) self._cache_lock = Lock() self._emission_job = None self._active_job = False def persisted_values(self): return ['_last_emission', '_cache'] def start(self): # Start emission job on service start if bool property is not checked if self.interval() and not self.signal_start(): now = datetime.utcnow() latest = self._last_emission or now delta = self.interval() - (now - latest) self._emission_job = Job( self._emit_job, delta, False, group=None, reset=True, ) def emit(self, group=None): self._emit_job(group) def _emit_job(self, group, reset=False): self.logger.debug('Emitting signals') if reset: self._emission_job.cancel() self._emission_job = Job( self._emit_job, self.interval(), True, group=group, ) self._last_emission = datetime.utcnow() signals = self._get_emit_signals(group) self._active_job = False if signals: self.logger.debug('Notifying {} signals'.format(len(signals))) self.notify_signals(signals) else: self.logger.debug('No signals to notify') def _get_emit_signals(self, group=None): signals = [] with self._cache_lock: if not group: for group in self._cache.keys(): signals.extend(self._get_emit_signals_for_group(group)) else: signals.extend(self._get_emit_signals_for_group(group)) return signals def _get_emit_signals_for_group(self, group): now = int(time()) signals = [] cache_times = sorted(self._cache[group].keys()) if self.interval_duration(): # Remove old signals from cache. old = now - int(self.interval_duration().total_seconds()) self.logger.debug( 'Removing signals from cache older than {}'.format(old)) for cache_time in cache_times: if cache_time < old: del self._cache[group][cache_time] else: break for cache in cache_times: signals.extend(self._cache[group][cache]) if not self.interval_duration(): # Clear cache every time if duration is not set. self.logger.debug('Clearing cache of signals') self._cache[group] = defaultdict(list) return signals def process_signals(self, signals): self.for_each_group(self.process_group, signals) # Start a new job if property is checked and there is no active job if self.signal_start() and not self._active_job: self._emission_job = Job( self._emit_job, self.interval(), False, group=None, reset=False, ) self._active_job = True # Added flag for active job def process_group(self, signals, key): with self._cache_lock: now = int(time()) self._cache[key][now].extend(signals)
def start(self): super().start() self._job = Job(self._emit, self.interval(), True) # Run an emit cycle immediately, but in a new thread since it # might take some time and we don't want it to hold up start spawn(self._emit)
class CronTrigger(): """ Notify signals accoriding to cron-like timetable """ cron = ObjectProperty(CronConf, title='Cron Schedule', default=CronConf()) def __init__(self): super().__init__() self._job = None self._cron_specs = None def configure(self, context): super().configure(context) # TODO: check that the config is valid cron syntax self._cron_specs = [self.cron.minute, self.cron.hour, self.cron.day_of_month, self.cron.month, self.cron.day_of_week] def start(self): super().start() # Like crontab, check to run jobs every minute self._job = Job(self._cron, timedelta(minutes=1), True) # Run a cron cycle immediately, but in a new thread since it # might take some time and we don't want it to hold up start spawn(self._cron) def stop(self): """ Stop the simulator thread and signal generation """ if self._job: self._job.cancel() super().stop() def _cron(self): """ Called every minute to check if cron job should notify signals """ self._logger.debug("Checking if cron emit should run") now = datetime.utcnow() now = [str(now.minute), str(now.hour), str(now.day), str(now.month), str(now.weekday())] if self._check_cron(now): spawn(self._emit) def _check_cron(self, now): """ Return True if cron property matches with `now` `now` is list containing the 5 cron field """ for i in range(5): # '*' should match no matter what if self._cron_specs[i] == '*': now[i] = '*' # TODO: handle more interesting cron settings than just numbers and '*' return now == self._cron_specs def _emit(self): self._logger.debug("Generating signals") signals = self.generate_signals() # If a generator is returned, build the list if not isinstance(signals, list): signals = list(signals) if signals: self._logger.debug("Notifying {} signals".format(len(signals))) self.notify_signals(signals) else: self._logger.debug("No signals generated")
class TwitterSearch(Block): version = VersionProperty("1.0.0") interval = TimeDeltaProperty(title="Query Interval", default={"minutes": 10}) tweet_text = ListProperty(StringType, title="Text includes", default=[]) hashtags = ListProperty(StringType, title="Hashtags", default=[]) _from = StringProperty(title="From user", default='') _to = StringProperty(title="To user", default='') at = ListProperty(StringType, title="Referenced users", default=[]) geo = ObjectProperty(GeoCode, title="Geographical") count = IntProperty(title="Max Results", default=25) lookback = IntProperty(title="Query Lookback (days)", default=-1) creds = ObjectProperty(TwitterCreds, title="Credentials") tude = SelectProperty( TwitterAttitude, default=TwitterAttitude.NEUTRAL, title="Tone" ) operator = SelectProperty( TwitterQueryOp, default=TwitterQueryOp.AND, title="Query Operator" ) result_type = SelectProperty( TwitterResultType, default=TwitterResultType.MIXED, title="Result Type" ) def __init__(self): super().__init__() self._auth = None self._url = None self._search_job = None def configure(self, context): super().configure(context) def start(self): super().start() self._authorize() self._construct_url() self._search_job = Job( self._search_tweets, self.interval(), False, self._url ) def stop(self): super().stop() self._search_job.cancel() def _search_tweets(self, url): rsp = requests.get(url, auth=self._auth) status = rsp.status_code if status == 200: data = rsp.json() tweets = data['statuses'] next_results = data['search_metadata'].get('next_results') self.notify_signals([Signal(t) for t in tweets]) if next_results is not None: self._search_tweets( "{0}{1}".format(SEARCH_URL, next_results) ) else: self.logger.debug("Scheduling next search...") self._search_job = Job( self._search_tweets, self.interval(), False, self._url ) else: self.logger.error( "Twitter search failed with status {0}".format(status)) def _construct_url(self): self._url = "{0}?".format(SEARCH_URL) query = self._process_query() if query: self._append_param('q', sep=self.operator().value, vals=query) if self.geo().latitude(): self._append_param('geo', ',', 'mi', [self.geo().latitude(), self.geo().longitude(), self.geo().radius()]) if self.lookback() >= 0: now = datetime.utcnow() - timedelta(days=self.lookback()) vals = [now.year, now.month, now.day] self._append_param('since', '-', vals=vals) if self.count(): self._append_param('count', vals=[self.count()]) self._append_param('result_type', vals=[self.result_type().value]) def _append_param(self, p_name, sep='', end='', vals=[]): val_str = quote(sep.join([str(v) for v in vals]) + end) self._url += "{0}={1}&".format(p_name, val_str) def _process_query(self): values = [] values.extend(self.tweet_text()) for h in self.hashtags(): values.append("#{0}".format(h)) for u in self.at(): values.append("@{0}".format(u)) if self._from(): values.append("from:{0}".format(self._from())) if self._to(): values.append("to:{0}".format(self._to())) if self.tude().value: values.append(self.tude().value) return values def _authorize(self): """ Prepare the OAuth handshake and verify. """ try: self._auth = OAuth1(self.creds().consumer_key(), self.creds().app_secret(), self.creds().oauth_token(), self.creds().oauth_token_secret()) resp = requests.get(VERIFY_CREDS_URL, auth=self._auth) if resp.status_code != 200: raise Exception("Status %s" % resp.status_code) except Exception as e: self.logger.error("Authentication Failed" "for consumer key: %s" % self.creds().consumer_key())
class CronTrigger(): """ Notify signals accoriding to cron-like timetable """ cron = ObjectProperty(CronConf, title='Cron Schedule', default=CronConf()) utc = BoolProperty(title='UTC', default=False) def __init__(self): super().__init__() self._job = None self._cron_specs = None def configure(self, context): super().configure(context) # TODO: check that the config is valid cron syntax self._cron_specs = [ self.cron().minute(), self.cron().hour(), self.cron().day_of_month(), self.cron().month(), self.cron().day_of_week() ] def start(self): super().start() # Like crontab, check to run jobs every minute self._job = Job(self._cron, timedelta(minutes=1), True) # Run a cron cycle immediately, but in a new thread since it # might take some time and we don't want it to hold up start spawn(self._cron) def stop(self): """ Stop the simulator thread and signal generation """ if self._job: self._job.cancel() super().stop() def _cron(self): """ Called every minute to check if cron job should notify signals """ self.logger.debug("Checking if cron emit should run") if (self.utc()): now = datetime.utcnow() else: now = datetime.now() now = [ str(now.minute), str(now.hour), str(now.day), str(now.month), str(now.weekday()) ] if self._check_cron(now): spawn(self._emit) def _check_cron(self, now): """ Return True if cron property matches with `now` `now` is list containing the 5 cron field """ for i in range(5): # '*' should match no matter what if self._cron_specs[i] == '*': now[i] = '*' # TODO: handle more interesting cron settings than just numbers and '*' return now == self._cron_specs def _emit(self): self.logger.debug("Generating signals") signals = self.generate_signals() # If a generator is returned, build the list if not isinstance(signals, list): signals = list(signals) if signals: self.logger.debug("Notifying {} signals".format(len(signals))) self.notify_signals(signals) else: self.logger.debug("No signals generated")
class HeartbeatHandler(object): """ A class that can send and handle socket.io heartbeats """ def __init__(self, send_callback, timeout_callback, heartbeat_interval, heartbeat_timeout, logger): """ Create a heartbeat handler with some timing parameters Args: send_callback (func): A function to call when sending heartbeats timeout_callback (func): A function to call when a heartbeat response is not received in time heartbeat_interval (int): How often (secs) to send heartbeats heartbeat_timeout (int): How long (secs) to wait for a heartbeat response from the server logger (Logger): Where to log information and diagnostics """ super().__init__() self._heartbeat_func = send_callback self._timeout_func = timeout_callback self._heartbeat_job = None self._heartbeat_expiry_job = None self._heartbeat_interval = heartbeat_interval self._heartbeat_timeout = heartbeat_timeout self.logger = logger def handle_heartbeat_response(self): """ Handle a response heartbeat from the server """ self.logger.debug("Heartbeat PONG received") # Restart the heartbeat expiry job self._start_expiry_job() def start_heartbeats(self): """ Start a job which will periodically send heartbeats to the server. This method will also start a job that will wait for responses in case the server doesn't respond in time. """ # Since we are starting a new heartbeat cycle, cancel anything # that was outstanding self.stop_heartbeats() # Start a job that will send heartbeats indefinitely self._heartbeat_job = Job(self._heartbeat_func, timedelta(seconds=self._heartbeat_interval), repeatable=True) # Also start a job that will wait for heartbeat timeouts self._start_expiry_job() def stop_heartbeats(self): self._stop_expiry_job() self._stop_heartbeat_job() def _start_expiry_job(self): # Stop the existing job, if it exists self._stop_expiry_job() self._heartbeat_expiry_job = Job( self._no_heartbeat_response, timedelta(seconds=self._heartbeat_timeout), repeatable=False) def _stop_heartbeat_job(self): """ Cancel and remove the job that sends heartbeats """ if self._heartbeat_job: self._heartbeat_job.cancel() self._heartbeat_job = None def _stop_expiry_job(self): """ Cancel and remove the job that waits for responses """ if self._heartbeat_expiry_job: self._heartbeat_expiry_job.cancel() self._heartbeat_expiry_job = None def _no_heartbeat_response(self): """ Called when a heartbeat request has expired. All we are going to do in here is tell the client we timed out. We don't want to stop sending heartbeats, maybe the next one will go through and the server will respond which will kick start the expiry process again. """ self.logger.warning( "No heartbeat response was received...reconnecting") self._timeout_func()
class Persistence(object): """ A block mixin that provides persistence for some variables. To use, override the persisted_values function and define which variables on your class you wish to have persisted. The values should be strings that correspond to the variable names to be saved. """ backup_interval = TimeDeltaProperty( visible=False, title='Backup Interval', default={"seconds": 60 * 60}) use_persistence = BoolProperty( title='Load from Persistence?', default=True) def __init__(self): super().__init__() self._backup_job = None def persisted_values(self): """ Return a dictionary containing the values to be persisted. This function should be overriden in a Block that wishes to use persistence. Return a dictionary with the key being the key you wish to save it under (useful for making changes to the block) and the value being the name of the instance attribute to save and load into. For example, if your block class has an instance level attribute called `_values` and you wish to save it, you could return this dictionary: { "values": "_values" } """ return {} def _load(self): """ Load the values from persistence """ self._logger.debug("Loading from persistence") for persist_key, persist_target in self.persisted_values().items(): if self.persistence.has_key(persist_key): loaded = self.persistence.load(persist_key) self._logger.debug("Loaded value {} for attribute {}".format( loaded, persist_target)) # Set the loaded value to the attribute on this class setattr(self, persist_target, loaded) def _save(self): """ Save the values to persistence """ self._logger.debug("Saving to persistence") for persist_key, persist_target in self.persisted_values().items(): self.persistence.store(persist_key, getattr(self, persist_target)) self.persistence.save() def configure(self, context): super().configure(context) if self.use_persistence: self._load() def start(self): super().start() # If we have configured a backup interval, set up the job to # periodically save to persistence if self.backup_interval.total_seconds() > 0: self._backup_job = Job(self._save, self.backup_interval, True) def stop(self): if self._backup_job: self._backup_job.cancel() # Do one last save before stopping self._save() super().stop()
def start(self): super().start() self._job = Job(self._emit, self.interval, True) # Run an emit cycle immediately, but in a new thread since it # might take some time and we don't want it to hold up start spawn(self._emit)