Пример #1
0
    def classify(self):

        log.debug(u'Classifying beekeeper fields {fieldnames}', fieldnames=self.fieldnames)

        # TODO: Can we account for multiple occurrences of "weightX" fields for mapping more than one scale?

        weight_synonyms = u'(weight|wght|gewicht)'
        temperature_synonyms = u'(temperature|temp|temperatur)'
        outside_synonyms = u'(outside|out|air|außen|aussen)'

        candidates = {
            'weight_total': [
                self.from_words(weight_synonyms, 'total', exclude=['stddev']),
                self.from_words(weight_synonyms, exclude=['stddev']),
            ],
            'temperature_outside': [
                self.from_words(temperature_synonyms, outside_synonyms),
                self.from_words(temperature_synonyms),
            ],
            'temperature_inside': [
                self.from_words(temperature_synonyms, 'inside'),
                self.from_words(temperature_synonyms),
            ],
        }
        #pprint(candidates)

        results = SmartBunch()
        for name, patterns in candidates.items():
            fieldname = self.find_match(patterns)
            if fieldname is not None:
                results[name] = fieldname

        log.info(u'Classified beekeeper fields "{fields}" from "{fieldnames}"', fields=results.dump(), fieldnames=self.fieldnames)

        return results
Пример #2
0
    def topic_to_topology(self, topic):
        """
        Decode MQTT topic segments implementing the »basic strategy«.

        The topology hierarchy is directly specified by the MQTT topic and is
        made up of a two path segments::

            realm / node

        The topology identifiers are specified as:

            - "realm" is the designated root realm. You should prefix the topic name
              with this label when opting in for all features of the telemetry platform.
              For other purposes, feel free to publish to any MQTT topic you like.

            - "node" is the node identifier. Choose anything you like. This usually
              gets transmitted from an embedded device node.
        """

        # decode the topic
        m = self.matcher.match(topic)
        if m:
            address = SmartBunch(m.groupdict())
        else:
            address = {}

        return address
Пример #3
0
    def classify(self):

        log.debug(u'Classifying beekeeper fields {fieldnames}', fieldnames=self.fieldnames)

        # TODO: Can we account for multiple occurrences of "weightX" fields for mapping more than one scale?

        weight_synonyms = u'(weight|wght|gewicht)'
        temperature_synonyms = u'(temperature|temp|temperatur)'
        outside_synonyms = u'(outside|out|air|außen|aussen)'

        candidates = {
            'weight_total': [
                self.from_words(weight_synonyms, 'total', exclude=['stddev']),
                self.from_words(weight_synonyms, exclude=['stddev']),
            ],
            'temperature_outside': [
                self.from_words(temperature_synonyms, outside_synonyms),
                self.from_words(temperature_synonyms),
            ],
            'temperature_inside': [
                self.from_words(temperature_synonyms, 'inside'),
                self.from_words(temperature_synonyms),
            ],
        }
        #pprint(candidates)

        results = SmartBunch()
        for name, patterns in candidates.items():
            fieldname = self.find_match(patterns)
            if fieldname is not None:
                results[name] = fieldname

        log.info(u'Classified beekeeper fields "{fields}" from "{fieldnames}"', fields=results.dump(), fieldnames=self.fieldnames)

        return results
Пример #4
0
    def topology_to_storage(self, topology, message_type=None):
        """
        Encode topology segment identifiers to database address.

        A database server usually has the concept of multiple databases,
        each with multiple tables. With other databases than RDBMS,
        they might be named differently, but the concept in general
        is the same.

        The topology information (realm, node) will
        get mapped to the database name and measurement name to
        compute the storage location for measurements.

            - realm + node     = database name
            - sensors | events = table name

        """

        # Derive database table suffix from message type.
        table_suffix = self.get_table_suffix(topology, message_type)

        # Use topology information as blueprint for storage address.
        storage = SmartBunch(topology)

        # Format and sanitize all input parameters used for database addressing.
        # Todo: Investigate using tags additionally to / instead of only "storage.measurement".
        sanitize = self.sanitize_db_identifier
        storage.label = sanitize('{}'.format(storage.node))
        storage.database = sanitize('{}_{}'.format(storage.realm,
                                                   storage.node))
        storage.measurement = sanitize('{}'.format(table_suffix))
        storage.measurement_events = sanitize('{}'.format('events'))

        return storage
Пример #5
0
    def topology_to_storage(self, topology):
        """
        Encode topology segment identifiers to database address.

        A database server usually has the concept of multiple databases,
        each with multiple tables. With other databases than RDBMS,
        they might be named differently, but the concept in general
        is the same.

        When mapping the topology quadruple (realm, network, gateway, node) in the form of:

            - realm + network = database name
            - gateway + node  = table name

        We have a perfect fit for computing the slot where to store the measurements.

        """

        # TODO: Investigate using tags additionally to / instead of database.measurement

        # data: Regular endpoint
        # loop: WeeWX (TODO: Move to specific vendor configuration.)
        if topology.slot.startswith('data') or topology.slot.startswith('loop'):
            suffix = 'sensors'
        elif topology.slot.startswith('event'):
            suffix = 'events'
        else:
            suffix = 'unknown'

        # Use topology information as blueprint for storage address
        storage = SmartBunch(topology)

        # Format and sanitize all input parameters used for database addressing
        sanitize = self.sanitize_db_identifier
        storage.label       = sanitize('{}-{}'.format(storage.gateway, storage.node))
        storage.database    = sanitize('{}_{}'.format(storage.realm, storage.network))
        storage.measurement = sanitize('{}_{}_{}'.format(storage.gateway, storage.node, suffix))
        storage.measurement_events = sanitize('{}_{}_{}'.format(storage.gateway, storage.node, 'events'))

        return storage
Пример #6
0
    def topic_to_topology(self, topic):
        """
        Decode MQTT topic segments implementing the »quadruple hierarchy strategy«.

        The topology hierarchy is directly specified by the MQTT topic and is
        made up of a minimum of four identifiers describing the core structure::

            realm / network / gateway / node

        The topology identifiers are specified as:

            - "realm" is the designated root realm. You should prefix the topic name
              with this label when opting in for all features of the telemetry platform.
              For other purposes, feel free to publish to any MQTT topic you like.

            - "network" is your personal realm. Choose anything you like or use an
              `Online GUID Generator <https://www.guidgenerator.com/>`_ to gain
              maximum uniqueness.

            - "gateway" is your gateway identifier. Choose anything you like.
              This does not have to be very unique, so you might use labels
              having the names of sites. While you are the owner of this
              namespace hierarchy, remember these labels might be visible on
              the collaborative ether, though.
              So the best thing would be to give kind of nicknames to your
              sites which don't identify their location.

            - "1" is your node identifier. Choose anything you like. This usually
              gets transmitted from an embedded device node. Remember one device node
              might have multiple sensors attached, which is beyond the scope of the
              collection platform: We just accept bunches of named measurement values,
              no matter which sensors they might originate from.
              In other words: We don't need nor favor numeric sensor identifiers,
              let's give them names!
        """

        # regular expression pattern for decoding MQTT topic address segments
        pattern = r'^(?P<realm>.+?)/(?P<network>.+?)/(?P<gateway>.+?)/(?P<node>.+?)(?:/(?P<slot>.+?))?$'

        # decode the topic
        p = re.compile(pattern)
        m = p.match(topic)
        if m:
            address = SmartBunch(m.groupdict())
        else:
            address = {}

        return address
Пример #7
0
    def get_dashboard_identity(self, storage_location, topology=None):

        # Compute effective topology information
        topology = topology or {}
        realm = topology.get('realm', 'default')
        network = topology.get('network', storage_location.database)

        # Derive dashboard uid and name from topology information
        identity = SmartBunch(
            uid=u'{realm}-{network}-instant'.format(realm=realm, network=network),
            name=u'{realm}-{network}'.format(realm=realm, network=network),
            title=u'{realm}-{network}'.format(realm=realm, network=network),
            # TODO: Use real title after fully upgrading to new Grafana API (i.e. don't use get-by-slug anymore!)
            #title=u'Hiveeyes Rohdaten im Netzwerk ' + network,
        )
        #print identity.prettify()

        return identity
Пример #8
0
    def get_dashboard_identity(self, storage_location, topology=None):

        # Compute effective topology information
        topology = topology or {}
        realm = topology.get('realm', 'default')
        network = topology.get('network', storage_location.database)

        # Derive dashboard uid and name from topology information
        nodename = u'{gateway} / {node}'.format(gateway=topology.gateway, node=topology.node)
        identity = SmartBunch(
            #uid=u'{realm}-{network}-instant'.format(realm=realm, network=network),
            name=self.strategy.topology_to_label(topology),
            title=self.strategy.topology_to_label(topology),
            # TODO: Use real title after fully upgrading to new Grafana API (i.e. don't use get-by-slug anymore!)
            # title=u'Hiveeyes Umweltcockpit für Meßknoten {nodename} im Netzwerk {network}'.format(nodename=nodename, network=network),
            # title=u'Hiveeyes Ertragscockpit für Meßknoten {nodename} im Netzwerk {network}'.format(nodename=nodename, network=network),
        )
        #print identity.prettify()

        return identity
Пример #9
0
    def topology_to_storage(self, topology):
        """
        Encode topology segment identifiers to database address.

        A database server usually has the concept of multiple databases,
        each with multiple tables. With other databases than RDBMS,
        they might be named differently, but the concept in general
        is the same.

        When mapping the topology quadruple (realm, network, gateway, node) in the form of:

            - realm + network = database name
            - gateway + node  = table name

        We have a perfect fit for computing the slot where to store the measurements.

        """

        # Todo: Investigate using tags additionally to / instead of database.measurement
        # Todo: Move specific stuff about WeeWX or Tasmota to some device-specific knowledgebase.

        # data:     Regular endpoint
        # loop:     WeeWX
        # SENSOR:   Sonoff-Tasmota
        if topology.slot.startswith('data') or topology.slot.startswith('loop') \
                or topology.slot.endswith('SENSOR') or topology.slot.endswith('STATE'):
            suffix = 'sensors'

        elif topology.slot.startswith('event'):
            suffix = 'events'

        else:
            suffix = 'unknown'

        # Use topology information as blueprint for storage address
        storage = SmartBunch(topology)

        # Format and sanitize all input parameters used for database addressing
        sanitize = self.sanitize_db_identifier
        storage.label = sanitize('{}-{}'.format(storage.gateway, storage.node))
        storage.database = sanitize('{}_{}'.format(storage.realm,
                                                   storage.network))
        storage.measurement = sanitize('{}_{}_{}'.format(
            storage.gateway, storage.node, suffix))
        storage.measurement_events = sanitize('{}_{}_{}'.format(
            storage.gateway, storage.node, 'events'))

        return storage
Пример #10
0
    def get_dashboard_identity(self, storage_location, topology=None):

        # Compute effective topology information
        topology = topology or {}
        realm = topology.get('realm', 'default')

        if 'network' in topology:
            name = topology.network
        else:
            name = topology.node

        # Derive dashboard uid and name from topology information
        identity = SmartBunch(
            #uid=u'{realm}-{name}-instant'.format(realm=realm, name=name),
            name=u'{realm}-{name}'.format(realm=realm, name=name),
            title=u'{realm}-{name}'.format(realm=realm, name=name),
            # TODO: Use real title after fully upgrading to new Grafana API (i.e. don't use get-by-slug anymore!)
            #title=u'Raw data for realm={realm} network={network}'.format(realm=realm, network=network),
        )
        #print identity.prettify()

        return identity
Пример #11
0
    def topic_to_topology(self, topic):
        """
        Decode MQTT topic segments implementing the »quadruple hierarchy strategy«.

        The topology hierarchy is directly specified by the MQTT topic and is
        made up of a minimum of four identifiers describing the core structure::

            realm / network / gateway / node

        The topology identifiers are specified as:

            - "realm" is the designated root realm. You should prefix the topic name
              with this label when opting in for all features of the telemetry platform.
              For other purposes, feel free to publish to any MQTT topic you like.

            - "network" is your personal realm. Choose anything you like or use an
              `Online GUID Generator <https://www.guidgenerator.com/>`_ to gain
              maximum uniqueness.

            - "gateway" is your gateway identifier. Choose anything you like.
              This does not have to be very unique, so you might use labels
              having the names of sites. While you are the owner of this
              namespace hierarchy, remember these labels might be visible on
              the collaborative ether, though. You might want to assign nicknames
              to your sites to not identify their location.

            - "node" is the node identifier. Choose anything you like. This usually
              gets transmitted from an embedded device node.
        """

        # Decode the topic.
        m = self.matcher.match(topic)
        if m:
            address = SmartBunch(m.groupdict())
        else:
            address = {}

        return address
Пример #12
0
    def tame_refresh_interval(self, preset='standard', force=False):
        """
        Tame refresh interval for all dashboards.

        :param mode: Which taming preset to use. Currently, only "standard" is
                     implemented, which is also the default preset.

        Introduction
        ------------
        The default dashboard refresh interval of 5 seconds is important
        for instant-on workbench operations. However, the update interval
        is usually just about 5 minutes after the sensor node is in the field.

        Problem
        -------
        Having high refresh rates on many dashboards can increase the overall
        system usage significantly, depending on how many users are displaying
        them in their browsers and the complexity of the database queries
        issued when rendering the dashboard.

        Solution
        --------
        In order to reduce the overall load on the data acquisition system,
        the refresh interval of dashboards not updated since a configurable
        threshold time is decreased according to rules of built-in presets.

        The default "standard" preset currently implements the following rules:

        - Leave all dashboards completely untouched which have been updated during the last 14 days
        - Apply a refresh interval of 5 minutes for all dashboards having the "live" tag
        - Completely disable refreshing for all dashboards having the "historical" tag
        - Apply a refresh interval of 30 minutes for all other dashboards

        """

        dashboard_list = self.grafana_api.get_dashboards()

        log.info(
            'Taming dashboard refresh interval with preset="{preset}" for {count} dashboards',
            preset=preset,
            count=len(dashboard_list))

        # Date of 14 days in the past
        before_14_days = arrow.utcnow().shift(days=-14)

        for dashboard_meta in dashboard_list:

            dashboard_meta = SmartBunch.bunchify(dashboard_meta)
            #print dashboard_meta.prettify()

            whoami = u'title="{title}", uid="{uid}"'.format(
                title=dashboard_meta['title'], uid=dashboard_meta['uid'])

            # Request dashboard by uid
            dashboard_uid = dashboard_meta['uid']
            response = self.grafana_api.get_dashboard_by_uid(dashboard_uid)
            response = SmartBunch.bunchify(response)

            # Get effective dashboard information from response
            folder_id = response.meta.folderId
            dashboard = response.dashboard

            # Compute new dashboard refresh interval by applying taming rules
            # Units: Mwdhmsy

            # 1. Check dashboard modification time against threshold
            modification_time = arrow.get(response.meta.updated)
            if not force and modification_time > before_14_days:
                log.debug(
                    'Skip taming dashboard with {whoami}, it has recently been modified',
                    whoami=whoami)
                continue

            # 2. Apply refresh interval by looking at the dashboard tags
            if 'live' in dashboard_meta.tags:
                refresh_interval = '5m'
            elif 'historical' in dashboard_meta.tags:
                refresh_interval = None
            else:
                refresh_interval = '30m'

            # Skip update procedure if refresh interval hasn't changed at all
            if refresh_interval == dashboard.refresh:
                continue

            # Set new refresh interval
            dashboard.refresh = refresh_interval

            # Update dashboard
            log.debug(
                'Taming dashboard with {whoami} to refresh interval of {interval}',
                whoami=whoami,
                interval=refresh_interval)
            response = self.grafana_api.grafana_client.dashboards.db.create(
                dashboard=dashboard, folderId=folder_id)

            # Report about the outcome
            if response['status'] == 'success':
                log.info('Successfully tamed dashboard with {whoami}',
                         whoami=whoami)
            else:
                log.warn('Failed taming dashboard with {whoami}',
                         whoami=whoami)
Пример #13
0
    def tame_refresh_interval(self, preset='standard', force=False):
        """
        Tame refresh interval for all dashboards.

        :param mode: Which taming preset to use. Currently, only "standard" is
                     implemented, which is also the default preset.

        Introduction
        ------------
        The default dashboard refresh interval of 5 seconds is important
        for instant-on workbench operations. However, the update interval
        is usually just about 5 minutes after the sensor node is in the field.

        Problem
        -------
        Having high refresh rates on many dashboards can increase the overall
        system usage significantly, depending on how many users are displaying
        them in their browsers and the complexity of the database queries
        issued when rendering the dashboard.

        Solution
        --------
        In order to reduce the overall load on the data acquisition system,
        the refresh interval of dashboards not updated since a configurable
        threshold time is decreased according to rules of built-in presets.

        The default "standard" preset currently implements the following rules:

        - Leave all dashboards completely untouched which have been updated during the last 14 days
        - Apply a refresh interval of 5 minutes for all dashboards having the "live" tag
        - Completely disable refreshing for all dashboards having the "historical" tag
        - Apply a refresh interval of 30 minutes for all other dashboards

        """

        dashboard_list = self.grafana_api.get_dashboards()

        log.info('Taming dashboard refresh interval with preset="{preset}" for {count} dashboards',
                 preset=preset, count=len(dashboard_list))

        # Date of 14 days in the past
        before_14_days = arrow.utcnow().shift(days=-14)

        for dashboard_meta in dashboard_list:

            dashboard_meta = SmartBunch.bunchify(dashboard_meta)
            #print dashboard_meta.prettify()

            whoami = u'title="{title}", uid="{uid}"'.format(title=dashboard_meta['title'], uid=dashboard_meta['uid'])

            # Request dashboard by uid
            dashboard_uid = dashboard_meta['uid']
            response = self.grafana_api.get_dashboard_by_uid(dashboard_uid)
            response = SmartBunch.bunchify(response)

            # Get effective dashboard information from response
            folder_id = response.meta.folderId
            dashboard = response.dashboard

            # Compute new dashboard refresh interval by applying taming rules
            # Units: Mwdhmsy

            # 1. Check dashboard modification time against threshold
            modification_time = arrow.get(response.meta.updated)
            if not force and modification_time > before_14_days:
                log.debug('Skip taming dashboard with {whoami}, it has recently been modified', whoami=whoami)
                continue

            # 2. Apply refresh interval by looking at the dashboard tags
            if 'live' in dashboard_meta.tags:
                refresh_interval = '5m'
            elif 'historical' in dashboard_meta.tags:
                refresh_interval = None
            else:
                refresh_interval = '30m'

            # Skip update procedure if refresh interval hasn't changed at all
            if refresh_interval == dashboard.refresh:
                continue

            # Set new refresh interval
            dashboard.refresh = refresh_interval

            # Update dashboard
            log.debug('Taming dashboard with {whoami} to refresh interval of {interval}', whoami=whoami, interval=refresh_interval)
            response = self.grafana_api.grafana_client.dashboards.db.create(dashboard=dashboard, folderId=folder_id)

            # Report about the outcome
            if response['status'] == 'success':
                log.info('Successfully tamed dashboard with {whoami}', whoami=whoami)
            else:
                log.warn('Failed taming dashboard with {whoami}', whoami=whoami)