def setupService(self): """ Configure effective data target by registering an appropriate downstream service object for handling the target address scheme. """ log.info(u'Starting {name} for serving address {address}', name=self.logname, address=self.address) self.settings = self.parent.settings if self.scheme == 'mqtt': # Register MqttAdapter service as downstream subsystem service object self.downstream = MqttAdapter( name=self.name + '-downstream', broker_host=self.settings.mqtt.host, broker_port=int(self.settings.mqtt.port), broker_username=self.settings.mqtt.username, broker_password=self.settings.mqtt.password) elif self.scheme == 'influxdb': # InfluxDB has no subsystem service, it's just an adapter pass else: raise KeyError( 'No target/downstream dispatcher for scheme {scheme}'.format( scheme=self.scheme)) # Register service component with its container if self.downstream: self.registerService(self.downstream)
def setupService(self): self.log(log.info, u'Bootstrapping') self.settings = self.parent.settings # Optionally register subsystem component as child service for subsystem in self.subsystems: if hasattr(self, subsystem): subsystem_service = getattr(self, subsystem) if isinstance(subsystem_service, Service): log.info('Registering subsystem component "{subsystem}" as service', subsystem=subsystem) self.registerService(subsystem_service) # Configure metrics to be collected each X seconds metrics_interval = int(self.channel.get('metrics_logger_interval', 60)) self.metrics = Bunch(tx_count=0, starttime=time.time(), interval=metrics_interval) subscriptions = read_list(self.channel.mqtt_topics) self.mqtt_service = MqttAdapter( name = u'mqtt-' + self.channel.realm, broker_host = self.settings.mqtt.host, broker_port = int(self.settings.mqtt.port), broker_username = self.settings.mqtt.username, broker_password = self.settings.mqtt.password, callback = self.mqtt_receive, subscriptions = subscriptions) self.registerService(self.mqtt_service) self.influx = InfluxDBAdapter(settings = self.settings.influxdb) # Perform MQTT message processing using a different thread pool self.threadpool = ThreadPool() self.thimble = Thimble(reactor, self.threadpool, self, ["process_message"])
def setupService(self): """ Configure effective data target by registering an appropriate downstream service object for handling the target address scheme. """ log.info(u'Starting {name} for serving address {address}', name=self.logname, address=self.address) self.settings = self.parent.settings if self.scheme == 'mqtt': # Register MqttAdapter service as downstream subsystem service object self.downstream = MqttAdapter( name = self.name + '-downstream', broker_host = self.settings.mqtt.host, broker_port = int(self.settings.mqtt.port), broker_username = self.settings.mqtt.username, broker_password = self.settings.mqtt.password) elif self.scheme == 'influxdb': # InfluxDB has no subsystem service, it's just an adapter pass else: raise KeyError('No target/downstream dispatcher for scheme {scheme}'.format(scheme=self.scheme)) # Register service component with its container if self.downstream: self.registerService(self.downstream)
class ForwarderTargetService(MultiServiceMixin, MultiService): """ Container service for target services. As of June 2016, there are currently two target services for emitting data, MQTT and InfluxDB. """ def __init__(self, address=None, **kwargs): MultiServiceMixin.__init__(self, **kwargs) self.address = address self.scheme = self.address.uri.scheme self.downstream = None def setupService(self): """ Configure effective data target by registering an appropriate downstream service object for handling the target address scheme. """ log.info(u'Starting {name} for serving address {address}', name=self.logname, address=self.address) self.settings = self.parent.settings if self.scheme == 'mqtt': # Register MqttAdapter service as downstream subsystem service object self.downstream = MqttAdapter( name=self.name + '-downstream', broker_host=self.settings.mqtt.host, broker_port=int(self.settings.mqtt.port), broker_username=self.settings.mqtt.username, broker_password=self.settings.mqtt.password) elif self.scheme == 'influxdb': # InfluxDB has no subsystem service, it's just an adapter pass else: raise KeyError( 'No target/downstream dispatcher for scheme {scheme}'.format( scheme=self.scheme)) # Register service component with its container if self.downstream: self.registerService(self.downstream) def emit(self, uri, bucket): """ Adapt, serialize and emit data bucket to target service. """ log.debug('Emitting to target scheme {scheme}', scheme=self.scheme) if self.scheme == 'mqtt': # Publish JSON payload to MQTT bus topic = uri payload = bucket.json # TODO: Use threads.deferToThread here? return self.downstream.publish(topic, payload) elif self.scheme == 'influxdb': # InfluxDB query wrapper using expression derived from transformation data dfq = DataFrameQuery(settings=self.settings, bucket=bucket) # Perform query and obtain results as pandas DataFrame df = dfq.query() # Announce routing information via http response headers bucket.request.setHeader('Target-Database', bucket.tdata.database) bucket.request.setHeader('Target-Expression', bucket.tdata.expression) bucket.request.setHeader('Target-Address-Scheme', self.scheme) bucket.request.setHeader('Target-Address-Uri', uri) # Database result is empty, send appropriate response if df is None or df.empty: return self.response_no_results(bucket) # DataFrame manipulation # Drop some fields from DataFrame as requested if 'exclude' in bucket.tdata and bucket.tdata.exclude: drop_fields = read_list(bucket.tdata.exclude, empty_elements=False) try: df.drop(drop_fields, axis=1, inplace=True) except ValueError as ex: log.error(last_error_and_traceback()) error_message = u'Error: {type} {message}'.format( type=type(ex), message=ex) return bucket.request.error_response( bucket, error_message=error_message) # Use only specified fields from DataFrame as requested if 'include' in bucket.tdata and bucket.tdata.include: use_fields = read_list(bucket.tdata.include, empty_elements=False) use_fields.insert(0, 'time') try: df = df.filter(use_fields, axis=1) except ValueError as ex: log.error(last_error_and_traceback()) error_message = u'Error: {type} {message}'.format( type=type(ex), message=ex) return bucket.request.error_response( bucket, error_message=error_message) # Propagate non-null values forward or backward. # With time series data, using pad/ffill is extremely common so that the “last known value” is available at every time point. # http://pandas.pydata.org/pandas-docs/stable/missing_data.html#filling-missing-values-fillna if 'pad' in bucket.tdata and asbool(bucket.tdata.pad): df.fillna(method='pad', inplace=True) if 'backfill' in bucket.tdata and asbool(bucket.tdata.backfill): df.fillna(method='backfill', inplace=True) if 'interpolate' in bucket.tdata and asbool( bucket.tdata.interpolate): # Performs linear interpolation at missing datapoints, # otherwise matplotlib would not plot the sparse data frame. # http://pandas.pydata.org/pandas-docs/stable/missing_data.html#interpolation df.interpolate(inplace=True) if 'sorted' in bucket.tdata and asbool(bucket.tdata.sorted): # http://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.sort.html df.sort(axis='columns', inplace=True) # Compute http response from DataFrame, taking designated output format into account response = HttpDataFrameResponse(bucket, dataframe=df) # Synchronous, the worker-threading is already on the HTTP layer return response.render() # Asynchronous: Perform computation in separate thread d = threads.deferToThread(response.render) d.addErrback(handleFailure, bucket.request) d.addBoth(bucket.request.write) d.addBoth(lambda _: bucket.request.finish()) return server.NOT_DONE_YET else: message = 'No target/downstream dispatcher for scheme {scheme}'.format( scheme=self.scheme) log.error(message) raise KeyError(message) def response_no_results(self, bucket): """ Send "404 Not Found" response body in text/plain format describing HTTP API query interface. """ # FIXME: Some words about "now-10d" being the default for "from", see influx.py. # FIXME: Maybe refactor "now-10d" to transformation machinery to make it accessible from here. error_message = u'# 404 Not Found\n#\n'\ u'# No data for query expression "{expression}"\n'\ u'# Please recognize absolute datetimes are expected to be in ISO 8601 format. '\ u'Default is UTC, optionally specify an appropriate timezone offset.\n'.format(expression=bucket.tdata.expression) error_message += u'#\n# Examples:\n#\n'\ u'# ?from=2016-06-25T22:00:00.000Z\n'\ u'# ?from=2016-06-26T00:00:00.000%2B02:00 (%2B is "+" urlencoded)\n'\ u'# ?from=now-4h&to=now-2h\n'\ u'# ?from=now-8d5h3m&to=now-6d' bucket.request.setResponseCode(http.NOT_FOUND) bucket.request.setHeader('Content-Type', 'text/plain; charset=utf-8') return error_message.encode('utf-8')
class MqttInfluxGrafanaService(MultiService, MultiServiceMixin): def __init__(self, channel=None, graphing=None, strategy=None): MultiService.__init__(self) # TODO: Sanity checks/assertions against channel, graphing and strategy # TODO: Make subsystems dynamic self.subsystems = ['channel', 'graphing', 'strategy'] self.channel = channel or Bunch(realm=None, subscriptions=[]) self.graphing = to_list(graphing) self.strategy = strategy self.name = u'service-mig-' + self.channel.get('realm', unicode(id(self))) def setupService(self): self.log(log.info, u'Bootstrapping') self.settings = self.parent.settings # Optionally register subsystem component as child service for subsystem in self.subsystems: if hasattr(self, subsystem): subsystem_service = getattr(self, subsystem) if isinstance(subsystem_service, Service): log.info('Registering subsystem component "{subsystem}" as service', subsystem=subsystem) self.registerService(subsystem_service) # Configure metrics to be collected each X seconds metrics_interval = int(self.channel.get('metrics_logger_interval', 60)) self.metrics = Bunch(tx_count=0, starttime=time.time(), interval=metrics_interval) subscriptions = read_list(self.channel.mqtt_topics) self.mqtt_service = MqttAdapter( name = u'mqtt-' + self.channel.realm, broker_host = self.settings.mqtt.host, broker_port = int(self.settings.mqtt.port), broker_username = self.settings.mqtt.username, broker_password = self.settings.mqtt.password, callback = self.mqtt_receive, subscriptions = subscriptions) self.registerService(self.mqtt_service) self.influx = InfluxDBAdapter(settings = self.settings.influxdb) # Perform MQTT message processing using a different thread pool self.threadpool = ThreadPool() self.thimble = Thimble(reactor, self.threadpool, self, ["process_message"]) def startService(self): self.setupService() #self.log(log.info, u'Starting') MultiService.startService(self) self.metrics_twingo = LoopingCall(self.process_metrics) self.metrics_twingo.start(self.metrics.interval, now=True) def log(self, level, prefix): level(u'{prefix} {class_name}. name={name}, channel={channel}', prefix=prefix, class_name=self.__class__.__name__, name=self.name, channel=dict(self.channel)) def topic_to_topology(self, topic): return self.strategy.topic_to_topology(topic) def topology_to_storage(self, topology): return self.strategy.topology_to_storage(topology) def get_basetopic(self, topic): topic = TopicMatchers.data.sub('', topic) topic = TopicMatchers.event.sub('', topic) return topic def classify_topic(self, topic): if TopicMatchers.data.search(topic): return MessageType.DATA_CONTAINER if TopicMatchers.discrete.search(topic): return MessageType.DATA_DISCRETE if TopicMatchers.event.search(topic): return MessageType.EVENT if TopicMatchers.error.search(topic): return MessageType.ERROR def mqtt_receive(self, topic=None, payload=None, **kwargs): try: # Synchronous message processing #return self.process_message(topic, payload, **kwargs) # Asynchronous message processing #deferred = threads.deferToThread(self.process_message, topic, payload, **kwargs) # Asynchronous message processing using different thread pool deferred = self.thimble.process_message(topic, payload, **kwargs) deferred.addErrback(self.mqtt_process_error, topic, payload) deferred.addErrback(self.mqtt_exception, topic, payload) return deferred except Exception: log.failure(u'Processing MQTT message failed. topic={topic}, payload={payload}', topic=topic, payload=payload) def process_message(self, topic, payload, **kwargs): payload = payload.decode('utf-8') # Ignore MQTT error signalling messages if topic.endswith('error.json'): return if self.channel.realm and not topic.startswith(self.channel.realm): #log.info('Ignoring message to topic {topic}, realm={realm}', topic=topic, realm=self.channel.realm) return False log.debug(u'Processing message on topic "{topic}" with payload "{payload}"', topic=topic, payload=payload) # Compute storage address from topic topology = self.topic_to_topology(topic) log.debug(u'Topology address: {topology}', topology=dict(topology)) message_type = self.classify_topic(topic) message = None # a) En bloc: Multiple measurements in JSON object # # The suffixes are: # # - data.json: Regular # - data/__json__: Homie # - loop: WeeWX (TODO: Move to specific vendor configuration.) # - message-json: Deprecated # if message_type == MessageType.DATA_CONTAINER: # Decode message from json format # Required for weeWX data #message = convert_floats(json.loads(payload)) message = json.loads(payload) # b) Discrete values # # The suffixes are: # # - data/temperature # - data/humidity # - ... # elif message_type == MessageType.DATA_DISCRETE: # TODO: Backward compat for single readings - remove! if 'slot' in topology and topology.slot.startswith('measure/'): topology.slot = topology.slot.replace('measure/', 'data/') # Single measurement as plain value; assume float # Convert to MessageType.DATA_CONTAINER if 'slot' in topology and topology.slot.startswith('data/'): # This is sensor data message_type = MessageType.DATA_CONTAINER # Amend topic and compute storage message from single scalar value name = topology.slot.replace('data/', '') value = float(payload) message = {name: value} # Set an event elif message_type == MessageType.EVENT: # This is an event message_type = MessageType.EVENT # Decode message from json format message = json.loads(payload) # Catch an error message elif message_type == MessageType.ERROR: log.debug(u'Ignoring error message from MQTT, "{topic}" with payload "{payload}"', topic=topic, payload=payload) return else: log.warn(u'Unknown message type on topic "{topic}" with payload "{payload}"', topic=topic, payload=payload) return # count transaction self.metrics.tx_count += 1 # TODO: Re-enable for measuring packet ingress frequency. # Currently turned off since sending timestamps from data acquisition. """ if 'time' in message: self.metrics.packet_time = message['time'] else: self.metrics.packet_time = None """ # TODO: Data enrichment machinery, e.g. for geospatial data # latitude/lat, longitude/long/lon/lng # 1. geohash => lat/lon # 2. postcode/city => lat/lon (forward geocoder) # 3. lat/lon => geohash if not geohash # 4. lat/lon => reverse geocoder (address information) # The outcome can provide additional meta information to be used by the tagging machinery below, # e.g. create tags from homogenized Nomatim address modulo "house_number" etc. # TODO: Already do the tagging enrichment machinery here(!) to # establish additional metadata schema for further processing, e.g. Grafana. # So, move the schwumms from storage handler here! # Sane order for Grafana template variables: # continent, country_code (upper), q-region, city, q-hood, road, (compound) # Compute storage location storage_location = self.topology_to_storage(topology) log.debug(u'Storage location: {storage}', storage=dict(storage_location)) # Store data or event if message_type in (MessageType.DATA_CONTAINER, MessageType.EVENT): self.store_message(storage_location, message) # Provision graphing subsystem if message_type == MessageType.DATA_CONTAINER: # TODO: Purge message from fields to be used as tags # Namely: # 'geohash', # 'location', 'location_id', 'location_name', 'sensor_id', 'sensor_type', # 'latitude', 'longitude', 'lat', 'lon' for graphing_subsystem in self.graphing: # Mix in references to each other. A bit of a hack, but okay for now :-). graphing_subsystem.strategy = self.strategy subsystem_name = graphing_subsystem.__class__.__name__ log.debug(u'Provisioning Grafana with {name}', name=subsystem_name) try: graphing_subsystem.provision(storage_location, message, topology=topology) except Exception as ex: log.failure(u'Grafana provisioning failed for storage={storage}, message={message}:\n{log_failure}', storage=storage_location, message=message, level=LogLevel.error) # MQTT error signalling failure = Failure() self.mqtt_publish_error(failure, topic, payload) return True def store_message(self, storage, data): """ Store data to timeseries database :param storage: The storage location object :param data: The data ready for storing """ self.influx.write(storage, data) def mqtt_process_error(self, failure, topic, payload): """ Failure handling :param failure: Failure object from Twisted :param topic: Full MQTT topic :param payload: Raw MQTT payload """ # Log failure log.failure(u'Processing MQTT message failed from topic "{topic}":\n{log_failure}', topic=topic, failure=failure, level=LogLevel.error) # MQTT error signalling self.mqtt_publish_error(failure, topic, payload) def mqtt_exception(self, failure, topic, payload): log.failure(u'Problem publishing error message:\n{log_failure}', failure=failure, level=LogLevel.warn) def mqtt_publish_error(self, failure, topic, payload): """ Error signalling over MQTT to "error.json" topic suffix :param failure: Failure object from Twisted :param topic: Full MQTT topic :param payload: Raw MQTT payload """ # Compute base topic of data acquisition channel basetopic = self.get_basetopic(topic) log.debug(u'Channel base topic is {basetopic}', basetopic=basetopic) # Effective error reporting topic error_topic = basetopic + '/' + 'error.json' # error = { 'type': unicode(failure.type), 'message': failure.getErrorMessage(), 'description': u'Error processing MQTT message "{payload}" from topic "{topic}".'.format(topic=topic, payload=payload), 'timestamp': arrow.utcnow().format('YYYY-MM-DDTHH:mm:ssZZ'), #'failure': unicode(failure), } message = json.dumps(error, indent=4) # Publish error signal over MQTT #log.debug('Publishing error message to topic {topic}: {message}', topic=error_topic, message=message) self.mqtt_service.publish(error_topic, message) def process_metrics(self): metrics = [] # Compute frequency of measurements if 'packet_time' in self.metrics and self.metrics['packet_time'] is not None: self.metrics.setdefault('packet_starttime', self.metrics.packet_time) # Convert nanos to seconds packet_duration = (self.metrics.packet_time - self.metrics.packet_starttime) / 1000.0 / 1000.0 / 1000.0 packet_duration = packet_duration or self.metrics.starttime if packet_duration != 0: packet_frequency = self.metrics.tx_count / float(packet_duration) else: packet_frequency = 0.0 metrics.append('measurements: %.02f Hz' % packet_frequency) # Reset for next round self.metrics.packet_starttime = self.metrics.packet_time # Compute frequency of transactions now = time.time() transaction_duration = now - self.metrics.starttime if transaction_duration != 0: transaction_frequency = self.metrics.tx_count / float(transaction_duration) else: transaction_frequency = 0.0 metrics.append('transactions: %.02f tps' % transaction_frequency) # Reset for next round self.metrics.tx_count = 0 self.metrics.starttime = now # Add information from the Twisted reactor pending_calls = reactor.getDelayedCalls() pending_count = len(pending_calls) #metrics.append('pending: %d' % pending_count) metrics_info = ', '.join(metrics) log.info('[{realm:12s}] {metrics_info}', realm=self.channel.realm, metrics_info=metrics_info)
class ForwarderTargetService(MultiServiceMixin, MultiService): """ Container service for target services. As of June 2016, there are currently two target services for emitting data, MQTT and InfluxDB. """ def __init__(self, address=None, **kwargs): MultiServiceMixin.__init__(self, **kwargs) self.address = address self.scheme = self.address.uri.scheme self.downstream = None def setupService(self): """ Configure effective data target by registering an appropriate downstream service object for handling the target address scheme. """ log.info(u'Starting {name} for serving address {address}', name=self.logname, address=self.address) self.settings = self.parent.settings if self.scheme == 'mqtt': # Register MqttAdapter service as downstream subsystem service object self.downstream = MqttAdapter( name = self.name + '-downstream', broker_host = self.settings.mqtt.host, broker_port = int(self.settings.mqtt.port), broker_username = self.settings.mqtt.username, broker_password = self.settings.mqtt.password) elif self.scheme == 'influxdb': # InfluxDB has no subsystem service, it's just an adapter pass else: raise KeyError('No target/downstream dispatcher for scheme {scheme}'.format(scheme=self.scheme)) # Register service component with its container if self.downstream: self.registerService(self.downstream) def emit(self, uri, bucket): """ Adapt, serialize and emit data bucket to target service. """ log.debug('Emitting to target scheme {scheme}', scheme=self.scheme) if self.scheme == 'mqtt': # Publish JSON payload to MQTT bus topic = uri payload = bucket.json # TODO: Use threads.deferToThread here? return self.downstream.publish(topic, payload) elif self.scheme == 'influxdb': # InfluxDB query wrapper using expression derived from transformation data dfq = DataFrameQuery(settings=self.settings, bucket=bucket) # Perform query and obtain results as pandas DataFrame df = dfq.query() # Announce routing information via http response headers bucket.request.setHeader('Target-Database', bucket.tdata.database) bucket.request.setHeader('Target-Expression', bucket.tdata.expression) bucket.request.setHeader('Target-Address-Scheme', self.scheme) bucket.request.setHeader('Target-Address-Uri', uri) # Database result is empty, send appropriate response if df is None or df.empty: return self.response_no_results(bucket) # DataFrame manipulation # Drop some fields from DataFrame as requested if 'exclude' in bucket.tdata and bucket.tdata.exclude: drop_fields = read_list(bucket.tdata.exclude, empty_elements=False) try: df.drop(drop_fields, axis=1, inplace=True) except ValueError as ex: log.error(last_error_and_traceback()) error_message = u'Error: {type} {message}'.format(type=type(ex), message=ex) return bucket.request.error_response(bucket, error_message=error_message) # Use only specified fields from DataFrame as requested if 'include' in bucket.tdata and bucket.tdata.include: use_fields = read_list(bucket.tdata.include, empty_elements=False) use_fields.insert(0, 'time') try: df = df.filter(use_fields, axis=1) except ValueError as ex: log.error(last_error_and_traceback()) error_message = u'Error: {type} {message}'.format(type=type(ex), message=ex) return bucket.request.error_response(bucket, error_message=error_message) # Propagate non-null values forward or backward. # With time series data, using pad/ffill is extremely common so that the “last known value” is available at every time point. # http://pandas.pydata.org/pandas-docs/stable/missing_data.html#filling-missing-values-fillna if 'pad' in bucket.tdata and asbool(bucket.tdata.pad): df.fillna(method='pad', inplace=True) if 'backfill' in bucket.tdata and asbool(bucket.tdata.backfill): df.fillna(method='backfill', inplace=True) if 'interpolate' in bucket.tdata and asbool(bucket.tdata.interpolate): # Performs linear interpolation at missing datapoints, # otherwise matplotlib would not plot the sparse data frame. # http://pandas.pydata.org/pandas-docs/stable/missing_data.html#interpolation df.interpolate(inplace=True) if 'sorted' in bucket.tdata and asbool(bucket.tdata.sorted): # http://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.sort.html df.sort(axis='columns', inplace=True) # Compute http response from DataFrame, taking designated output format into account response = HttpDataFrameResponse(bucket, dataframe=df) # Synchronous, the worker-threading is already on the HTTP layer return response.render() # Asynchronous: Perform computation in separate thread d = threads.deferToThread(response.render) d.addErrback(handleFailure, bucket.request) d.addBoth(bucket.request.write) d.addBoth(lambda _: bucket.request.finish()) return server.NOT_DONE_YET else: message = 'No target/downstream dispatcher for scheme {scheme}'.format(scheme=self.scheme) log.error(message) raise KeyError(message) def response_no_results(self, bucket): """ Send "404 Not Found" response body in text/plain format describing HTTP API query interface. """ # FIXME: Some words about "now-10d" being the default for "from", see influx.py. # FIXME: Maybe refactor "now-10d" to transformation machinery to make it accessible from here. error_message = u'# 404 Not Found\n#\n'\ u'# No data for query expression "{expression}"\n'\ u'# Please recognize absolute datetimes are expected to be in ISO 8601 format. '\ u'Default is UTC, optionally specify an appropriate timezone offset.\n'.format(expression=bucket.tdata.expression) error_message += u'#\n# Examples:\n#\n'\ u'# ?from=2016-06-25T22:00:00.000Z\n'\ u'# ?from=2016-06-26T00:00:00.000%2B02:00 (%2B is "+" urlencoded)\n'\ u'# ?from=now-4h&to=now-2h\n'\ u'# ?from=now-8d5h3m&to=now-6d' bucket.request.setResponseCode(http.NOT_FOUND) bucket.request.setHeader('Content-Type', 'text/plain; charset=utf-8') return error_message.encode('utf-8')