def _needs_upload_to_cloud(self, metric): metric_type = metric['type'] metric_source = metric['source'] # get definition for metric source and type, getting the definitions for a metric_source is case sensitive! definition = self.definitions.get(metric_source, {}).get(metric_type) if definition is None: return False if Config.get_entry('cloud_enabled', False) is False: return False if metric_source == 'OpenMotics': if Config.get_entry('cloud_metrics_enabled|{0}'.format(metric_type), True) is False: return False # filter openmotics metrics that are not listed in cloud_metrics_types metric_types = Config.get_entry('cloud_metrics_types', []) if metric_type not in metric_types: return False else: # filter 3rd party (plugin) metrics that are not listed in cloud_metrics_sources metric_sources = Config.get_entry('cloud_metrics_sources', []) # make sure to get the lowercase metric_source if metric_source.lower() not in metric_sources: return False return True
def _process_configuration_data(self, configuration): try: configuration_changed = self._configuration != configuration if configuration_changed: for setting, value in configuration.items(): Config.set_entry(setting, value) logger.info('Configuration changed: {0}'.format(configuration)) self._configuration = configuration except Exception: logger.exception( 'Unexpected exception processing configuration data')
def _controller_health(self, name, controller, device_reset): # type: (str, Union[PowerCommunicator,MasterController], Callable[[],None]) -> None status = controller.get_communicator_health() if status == CommunicationStatus.SUCCESS: Config.remove_entry('communication_recovery_{0}'.format(name)) # Cleanup legacy Config.remove_entry('communication_recovery') elif status == CommunicationStatus.UNSTABLE: logger.warning('Observed unstable communication for %s', name) else: reset_action = self._get_reset_action(name, controller) if reset_action is not None: device_reset() if reset_action == 'service': time.sleep(15) os._exit(1)
def _insert_defaults(): # type: () -> None """ Inserting the default values into the table """ for key, default_value in {'cloud_enabled': True, 'cloud_endpoint': 'cloud.openmotics.com', 'cloud_endpoint_metrics': 'portal/metrics/', 'cloud_metrics_types': [], 'cloud_metrics_sources': [], 'cloud_metrics_enabled|energy': True, 'cloud_metrics_enabled|counter': True, 'cloud_metrics_batch_size': 50, 'cloud_metrics_min_interval': 300, 'cloud_support': False, 'cors_enabled': False}.items(): if Config.get_entry(key, None) is None: Config.set_entry(key, default_value)
def test_multiple_types(self): """ Test different types """ Config.set_entry('str', 'test') Config.set_entry('int', 37) Config.set_entry('bool', True) res = Config.get_entry('str', None) self.assertEqual(res, 'test') res = Config.get_entry('int', None) self.assertEqual(res, 37) res = Config.get_entry('bool', None) self.assertEqual(res, True)
def test_set_cloud_interval(self): MetricsTest.intervals = {} metrics_controller = MetricsTest._get_controller(intervals=['energy']) metrics_controller._refresh_cloud_interval() self.assertEqual(MetricsTest.intervals.get('energy'), 300) metrics_controller.set_cloud_interval('energy', 900) self.assertEqual(MetricsTest.intervals.get('energy'), 900) self.assertEqual(Config.get_entry('cloud_metrics_interval|energy', 0), 900)
def enqueue_event(self, event): if Config.get_entry('cloud_enabled', False) is False: return if event.type == GatewayEvent.Types.CONFIG_CHANGE: if event.data.get('type') == 'input': self._event_enabled_cache = {} if self._should_send_event(event): event.data['timestamp'] = time.time() self._queue.appendleft(event)
def _get_cherrypy_mounts(self): mounts = [] cors_enabled = Config.get_entry('cors_enabled', False) for runner in self._iter_running_runners(): mounts.append({'root': runner.get_webservice(self._webinterface), 'script_name': '/plugins/{0}'.format(runner.name), 'config': {'/': {'tools.sessions.on': False, 'tools.trailing_slash.on': False, 'tools.cors.on': cors_enabled}}}) return mounts
def test_duplicates(self): """test of duplicate settings""" Config.set_entry('test', 'test') res = Config.get_entry('test', None) self.assertEqual(res, 'test') Config.set_entry('test', 'test2') res = Config.get_entry('test', None) self.assertEqual(res, 'test2') Config.remove_entry('test') res = Config.get_entry('test', None) self.assertIsNone(res)
def _collect_debug_dumps( self ): # type: () -> Tuple[Dict[str, Dict[float, Dict]], List[float]] raw_dumps = self._get_debug_dumps() data = { 'dumps': {}, 'dump_info': {k: v.get('info', {}) for k, v in raw_dumps.items()} } # type: Dict[str, Dict[float, Dict]] if Config.get_entry('cloud_support', False): # Include full dumps when support is enabled data['dumps'] = raw_dumps return data, list(raw_dumps.keys())
def _beat(self): # type: () -> float # Check whether connection to the Cloud is enabled/disabled self._cloud_enabled = Config.get_entry('cloud_enabled', False) if self._cloud_enabled is False: self._sleep_time = DEFAULT_SLEEP_TIME task_data = { 'open_vpn': False, 'events': [(OMBusEvents.VPN_OPEN, False), (OMBusEvents.CLOUD_REACHABLE, False)] } self._task_executor.set_new_tasks(task_data=task_data) return 0.0 # Load collected data from async collectors call_data = {} # type: Dict[str, Dict[str, Any]] for collector_key in self._collectors: collector = self._collectors[collector_key] data = collector.data if data is not None: call_data[collector_key] = data # Load debug data debug_data = self._debug_collector.data debug_references = None # type: Optional[List[float]] if debug_data is not None: call_data['debug'], debug_references = debug_data # Send data to the cloud and load response call_home_start = time.time() response = self._cloud.call_home(call_data) call_home_duration = time.time() - call_home_start call_home_successful = response.get('success', False) self._sleep_time = response.get('sleep_time', DEFAULT_SLEEP_TIME) if call_home_successful: self._last_successful_heartbeat = time.time() self._debug_collector.clear(debug_references) # Gather tasks to be executed task_data = { 'events': [(OMBusEvents.CLOUD_REACHABLE, call_home_successful)], 'open_vpn': response.get('open_vpn', True), 'connectivity': self._last_successful_heartbeat } for entry in ['configuration', 'intervals']: if entry in response: task_data[entry] = response[entry] self._task_executor.set_new_tasks(task_data=task_data) return call_home_duration
def test_empty(self): """ Test an empty database. """ res = Config.get_entry('test', None) self.assertIsNone(res) Config.set_entry('test', 'test') res = Config.get_entry('test', None) self.assertEqual(res, 'test') Config.remove_entry('test') res = Config.get_entry('test', None) self.assertIsNone(res)
def _migrate(cls): # type: () -> None old_sqlite_db = constants.get_config_database_file() if os.path.exists(old_sqlite_db): import sqlite3 connection = sqlite3.connect(old_sqlite_db, detect_types=sqlite3.PARSE_DECLTYPES, check_same_thread=False, isolation_level=None) cursor = connection.cursor() for row in cursor.execute('SELECT * FROM settings;'): setting = row[1] config = Config.get_or_none(Config.setting == setting) if config is None: config = Config( setting=setting, data=row[2] ) config.save() else: config.data = row[2] config.save() os.rename(old_sqlite_db, '{0}.bak'.format(old_sqlite_db))
def _refresh_cloud_interval(self): for metric_type in self._metrics_collector.intervals: interval = Config.get_entry('cloud_metrics_interval|{0}'.format(metric_type), 300) self.set_cloud_interval(metric_type, interval, save=False) self._throttled_down = False
def test_needs_upload(self): # 0. the boring stuff def load_buffer(before=None): _ = before return [] metrics_cache_mock = Mock() metrics_cache_mock.load_buffer = load_buffer metrics_collector_mock = Mock() metrics_collector_mock.intervals = [] metrics_collector_mock.get_definitions = lambda: [] # 1. baseline config and definitions Config.set_entry('cloud_enabled', True) Config.set_entry('cloud_metrics_types', ['counter', 'energy']) Config.set_entry('cloud_metrics_sources', ['openmotics']) Config.set_entry('cloud_metrics_enabled|energy', True) definitions = {'OpenMotics': {'counter': Mock(), 'energy': Mock()}} SetUpTestInjections(plugin_controller=Mock(), metrics_collector=metrics_collector_mock, metrics_cache_controller=metrics_cache_mock, gateway_uuid=Mock()) metrics_controller = MetricsController() metrics_controller.definitions = definitions # 2. test simple metric metric = { 'source': 'OpenMotics', 'type': 'energy', 'timestamp': 1234, 'tags': { 'device': 'OpenMotics energy ID1', 'id': 'E7.3' }, 'values': { 'counter': 5678, 'power': 9012 } } needs_upload = metrics_controller._needs_upload_to_cloud(metric) self.assertTrue(needs_upload) # 3. disable energy metric type, now test again Config.set_entry('cloud_metrics_enabled|energy', False) needs_upload = metrics_controller._needs_upload_to_cloud(metric) self.assertFalse(needs_upload) Config.set_entry('cloud_metrics_enabled|energy', True) # 3. disable energy metric type, now test again Config.set_entry('cloud_metrics_types', ['counter']) needs_upload = metrics_controller._needs_upload_to_cloud(metric) self.assertFalse(needs_upload) Config.set_entry('cloud_metrics_types', ['counter', 'energy']) # 4. test metric with unconfigured definition metric = { 'source': 'MBus', 'type': 'energy', 'timestamp': 1234, 'tags': { 'device': 'OpenMotics energy ID1', 'id': 'E7.3' }, 'values': { 'counter': 5678, 'power': 9012 } } needs_upload = metrics_controller._needs_upload_to_cloud(metric) self.assertFalse(needs_upload) # 5. configure definition, now test again definitions['MBus'] = {'counter': Mock(), 'energy': Mock()} needs_upload = metrics_controller._needs_upload_to_cloud(metric) self.assertFalse(needs_upload) # 5. configure source, now test again cnf = Config.get_entry('cloud_metrics_sources', []) cnf.append('mbus') Config.set_entry('cloud_metrics_sources', cnf) needs_upload = metrics_controller._needs_upload_to_cloud(metric) self.assertTrue(needs_upload) # 7. disable cloud, now test again Config.set_entry('cloud_enabled', False) needs_upload = metrics_controller._needs_upload_to_cloud(metric) self.assertFalse(needs_upload)
def test_metrics_receiver(self): Config.set_entry('cloud_endpoint', 'tests.openmotics.com') Config.set_entry('cloud_endpoint_metrics', 'metrics') Config.set_entry('cloud_metrics_interval|foobar', 5) # Add interceptors send_metrics = [] response_data = {} def post(url, data, timeout): _ = url, timeout # Extract metrics, parse assumed data format time.sleep(1) send_metrics.append([m[0] for m in json.loads(data['metrics'])]) response = type('response', (), {})() response.text = json.dumps(copy.deepcopy(response_data)) return response # Initialize (mocked) classes base_metric = { 'source': 'OpenMotics', 'type': 'foobar', 'timestamp': 1, 'tags': { 'name': 'name', 'id': 0 }, 'values': { 'counter': 0 } } requests.post = post SetUpTestInjections(metrics_db=':memory:', metrics_db_lock=Lock()) metrics_cache = MetricsCacheController() metrics_collector_mock = Mock() metrics_collector_mock.intervals = [] definitions = [{ 'type': 'foobar', 'tags': ['id', 'name'], 'metrics': [{ 'name': 'counter', 'description': 'Some field', 'type': 'counter', 'policies': ['buffer'], 'unit': '' }] }] metrics_collector_mock.get_definitions = lambda: definitions SetUpTestInjections(plugin_controller=Mock(), metrics_collector=metrics_collector_mock, metrics_cache_controller=metrics_cache, gateway_uuid='uuid') metrics_controller = MetricsController() metrics_controller._needs_upload_to_cloud = lambda *args, **kwargs: True self.assertEqual(metrics_controller._buffer_counters, {'OpenMotics': { 'foobar': { 'counter': True } }}) # Add some helper methods def send_metric(counter, error): response_data.update({'success': True}) if error: response_data.update({'success': False, 'error': 'error'}) metric = copy.deepcopy(base_metric) # noinspection PyTypeChecker metric['timestamp'] = time.time() metric['values']['counter'] = counter metrics_controller.receiver(metric) return metric def assert_fields(controller, cache, queue, stats, buffer, last_send, last_try, retry_interval): self.assertDictEqual(controller._cloud_cache, cache) self.assertListEqual(controller._cloud_queue, queue) self.assertDictEqual(controller.cloud_stats, stats) self.assertListEqual(controller._cloud_buffer, buffer) self.assertEqual(controller._cloud_last_send, last_send) self.assertEqual(controller._cloud_last_try, last_try) self.assertEqual(controller._cloud_retry_interval, retry_interval) # Validate initial state assert_fields(metrics_controller, cache={}, queue=[], stats={ 'queue': 0, 'buffer': 0, 'time_ago_send': 0, 'time_ago_try': 0 }, buffer=[], last_send=0, last_try=0, retry_interval=None) logger.info('Send first metrics, but raise exception on "cloud"') send_metrics = [] Config.set_entry('cloud_metrics_batch_size', 0) Config.set_entry('cloud_metrics_min_interval', 0) time.sleep(10) # Time moves on inside fakesleep metric_1 = send_metric(counter=0, error=True) buffer_metric_timestamp = metric_1['timestamp'] self.assertEqual(len(send_metrics), 1) metrics = send_metrics.pop() self.assertEqual(len(metrics), 1) self.assertDictEqual(metrics.pop(), metric_1) assert_fields( metrics_controller, cache={ 'OpenMotics': { 'foobar': { 'id=0|name=name': { 'timestamp': 10 } } } }, queue=[[metric_1]], stats={ 'queue': 1, 'buffer': 0, 'time_ago_send': 10, 'time_ago_try': 10 }, # Nothing buffered yet buffer=[], last_send=0, last_try=10, retry_interval=0) buffered_metrics = MetricsTest._load_buffered_metrics(metrics_cache) self.assertEqual(buffered_metrics, [{ 'timestamp': buffer_metric_timestamp, 'counter': 0 }]) logger.info('Send another metric, still errors on "cloud"') time.sleep(10) # Time moves on inside fakesleep metric_2 = send_metric(counter=1, error=True) self.assertEqual(len(send_metrics), 1) metrics = send_metrics.pop() self.assertEqual(len(metrics), 2) self.assertDictEqual(metrics.pop(), metric_2) self.assertDictEqual(metrics.pop(), metric_1) assert_fields(metrics_controller, cache={ 'OpenMotics': { 'foobar': { 'id=0|name=name': { 'timestamp': 20 } } } }, queue=[[metric_1], [metric_2]], stats={ 'queue': 2, 'buffer': 1, 'time_ago_send': 21, 'time_ago_try': 11 }, buffer=[], last_send=0, last_try=21, retry_interval=0) buffered_metrics = MetricsTest._load_buffered_metrics(metrics_cache) self.assertEqual(buffered_metrics, [{ 'timestamp': buffer_metric_timestamp, 'counter': 0 }]) logger.info( 'Send another metric, this time the call is accepted correctly') time.sleep(10) # Time moves on inside fakesleep metric_3 = send_metric(counter=2, error=False) self.assertEqual(len(send_metrics), 1) metrics = send_metrics.pop() self.assertEqual(len(metrics), 3) self.assertDictEqual(metrics.pop(), metric_3) self.assertDictEqual(metrics.pop(), metric_2) self.assertDictEqual(metrics.pop(), metric_1) assert_fields( metrics_controller, cache={ 'OpenMotics': { 'foobar': { 'id=0|name=name': { 'timestamp': 30 } } } }, queue=[], stats={ 'queue': 3, 'buffer': 1, 'time_ago_send': 32, 'time_ago_try': 11 }, # Buffer stats not cleared yet buffer=[], last_send=32, last_try=32, retry_interval=0) buffered_metrics = MetricsTest._load_buffered_metrics(metrics_cache) self.assertEqual(buffered_metrics, []) logger.info('Send another metrics, with increased batch sizes') send_metrics = [] Config.set_entry('cloud_metrics_batch_size', 3) Config.set_entry('cloud_metrics_min_interval', 300) time.sleep(10) # Time moves on inside fakesleep metric_1 = send_metric(counter=3, error=False) time.sleep(1) # Time moves on inside fakesleep send_metric( counter=4, error=False ) # This metric has the same (rounded) timestamp, so should be discarded time.sleep(9) # Time moves on inside fakesleep metric_2 = send_metric(counter=5, error=False) self.assertEqual(len(send_metrics), 0) # No metric send, still < batch size assert_fields(metrics_controller, cache={ 'OpenMotics': { 'foobar': { 'id=0|name=name': { 'timestamp': 50 } } } }, queue=[[metric_1], [metric_2]], stats={ 'queue': 2, 'buffer': 0, 'time_ago_send': 21, 'time_ago_try': 21 }, buffer=[], last_send=32, last_try=32, retry_interval=300) buffered_metrics = MetricsTest._load_buffered_metrics(metrics_cache) self.assertEqual(buffered_metrics, []) time.sleep(10) # Time moves on inside fakesleep metric_3 = send_metric( counter=6, error=False) # Add another metric, now reaching batch size self.assertEqual(len(send_metrics), 1) metrics = send_metrics.pop() self.assertEqual(len(metrics), 3) self.assertDictEqual(metrics.pop(), metric_3) self.assertDictEqual(metrics.pop(), metric_2) self.assertDictEqual(metrics.pop(), metric_1) self.assertEqual(len(metrics), 0) assert_fields(metrics_controller, cache={ 'OpenMotics': { 'foobar': { 'id=0|name=name': { 'timestamp': 60 } } } }, queue=[], stats={ 'queue': 3, 'buffer': 0, 'time_ago_send': 31, 'time_ago_try': 31 }, buffer=[], last_send=63, last_try=63, retry_interval=300) buffered_metrics = MetricsTest._load_buffered_metrics(metrics_cache) self.assertEqual(buffered_metrics, []) logger.info( 'Send metric after minimum interval, even though batch size isn\'t reached' ) time.sleep(300) # Time moves on inside fakesleep metric_1 = send_metric( counter=6, error=False) # Add another metric, now reaching batch size self.assertEqual(len(send_metrics), 1) metrics = send_metrics.pop() self.assertListEqual(metrics, [metric_1]) assert_fields(metrics_controller, cache={ 'OpenMotics': { 'foobar': { 'id=0|name=name': { 'timestamp': 360 } } } }, queue=[], stats={ 'queue': 1, 'buffer': 0, 'time_ago_send': 301, 'time_ago_try': 301 }, buffer=[], last_send=364, last_try=364, retry_interval=300) buffered_metrics = MetricsTest._load_buffered_metrics(metrics_cache) self.assertEqual(buffered_metrics, []) logger.info('Send metric, but raise exception on "cloud"') send_metrics = [] Config.set_entry('cloud_metrics_batch_size', 0) time.sleep(10) # Time moves on inside fakesleep metric_1 = send_metric(counter=7, error=True) buffer_metric_timestamp = metric_1['timestamp'] self.assertEqual(len(send_metrics), 1) metrics = send_metrics.pop() self.assertEqual(len(metrics), 1) self.assertDictEqual(metrics.pop(), metric_1) assert_fields( metrics_controller, cache={ 'OpenMotics': { 'foobar': { 'id=0|name=name': { 'timestamp': 375 } } } }, queue=[[metric_1]], stats={ 'queue': 1, 'buffer': 0, 'time_ago_send': 11, 'time_ago_try': 11 }, # Nothing buffered yet buffer=[], last_send=364, last_try=375, retry_interval=300) buffered_metrics = MetricsTest._load_buffered_metrics(metrics_cache) self.assertEqual(buffered_metrics, [{ 'timestamp': buffer_metric_timestamp, 'counter': 7 }]) # Emulate service restart metrics_controller = MetricsController() metrics_controller._needs_upload_to_cloud = lambda *args, **kwargs: True # Validate startup state assert_fields(metrics_controller, cache={}, queue=[], stats={ 'queue': 0, 'buffer': 1, 'time_ago_send': 0, 'time_ago_try': 0 }, buffer=[[metric_1]], last_send=376, last_try=376, retry_interval=None) buffered_metrics = MetricsTest._load_buffered_metrics(metrics_cache) self.assertEqual(buffered_metrics, [{ 'timestamp': buffer_metric_timestamp, 'counter': 7 }]) logger.info( 'Send another metric which should result in sending queue en buffer' ) time.sleep(10) # Time moves on inside fakesleep metric_2 = send_metric(counter=8, error=False) self.assertEqual(len(send_metrics), 1) metrics = send_metrics.pop() self.assertEqual(len(metrics), 2) self.assertDictEqual(metrics.pop(), metric_2) self.assertDictEqual(metrics.pop(), metric_1) assert_fields(metrics_controller, cache={ 'OpenMotics': { 'foobar': { 'id=0|name=name': { 'timestamp': 385 } } } }, queue=[], stats={ 'queue': 1, 'buffer': 1, 'time_ago_send': 10, 'time_ago_try': 10 }, buffer=[], last_send=386, last_try=386, retry_interval=300) buffered_metrics = MetricsTest._load_buffered_metrics(metrics_cache) self.assertEqual(buffered_metrics, [])
def set_cloud_interval(self, metric_type, interval, save=True): logger.info('Setting cloud interval {0}_{1}'.format(metric_type, interval)) self._metrics_collector.set_cloud_interval(metric_type, interval) if save: Config.set_entry('cloud_metrics_interval|{0}'.format(metric_type), interval)
def test_delete_non_existing(self): """ Test deleting non existing setting """ Config.set_entry('str', 'test') Config.set_entry('int', 37) Config.set_entry('bool', True) Config.remove_entry('str') res = Config.get_entry('str', None) self.assertIsNone(res) Config.remove_entry('str') res = Config.get_entry('str', None) self.assertIsNone(res) res = Config.get_entry('int', None) self.assertEqual(res, 37) res = Config.get_entry('bool', None) self.assertEqual(res, True)
def receiver(self, metric): # type: (Dict[str,Any]) -> None """ Collects all metrics made available by the MetricsCollector and the plugins. These metrics are cached locally for configurable (and optional) pushing metrics to the Cloud. > example_definition = {"type": "energy", > "tags": ["device", "id"], > "metrics": [{"name": "power", > "description": "Total energy consumed (in kWh)", > "type": "counter", > "unit": "kWh"}]} > example_metric = {"source": "OpenMotics", > "type": "energy", > "timestamp": 1497677091, > "tags": {"device": "OpenMotics energy ID1", > "id": "E7.3"}, > "values": {"power": 1234}} """ metric_type = metric['type'] metric_source = metric['source'] if not self._needs_upload_to_cloud(metric): return if metric_source == 'OpenMotics': # round off timestamps for openmotics metrics modulo_interval = Config.get_entry('cloud_metrics_interval|{0}'.format(metric_type), 900) timestamp = int(metric['timestamp'] - metric['timestamp'] % modulo_interval) else: timestamp = int(metric['timestamp']) cloud_batch_size = Config.get_entry('cloud_metrics_batch_size', 0) cloud_min_interval = Config.get_entry('cloud_metrics_min_interval', None) # type: Optional[int] if cloud_min_interval is not None: self._cloud_retry_interval = cloud_min_interval endpoint = Config.get_entry('cloud_endpoint', None) # type: Optional[str] if endpoint is None: return metrics_endpoint = '{0}/{1}?uuid={2}'.format( endpoint if endpoint.startswith('http') else 'https://{0}'.format(endpoint), Config.get_entry('cloud_endpoint_metrics', ''), self._gateway_uuid ) counters_to_buffer = self._buffer_counters.get(metric_source, {}).get(metric_type, {}) definition = self.definitions.get(metric_source, {}).get(metric_type) identifier = '|'.join(['{0}={1}'.format(tag, metric['tags'][tag]) for tag in sorted(definition['tags'])]) # Check if the metric needs to be send entry = self._cloud_cache.setdefault(metric_source, {}).setdefault(metric_type, {}).setdefault(identifier, {}) include_this_metric = False if 'timestamp' not in entry: include_this_metric = True else: old_timestamp = entry['timestamp'] if old_timestamp < timestamp: include_this_metric = True # Add metrics to the send queue if they need to be send if include_this_metric is True: entry['timestamp'] = timestamp self._cloud_queue.append([metric]) self._cloud_queue = self._cloud_queue[-5000:] # 5k metrics buffer # Check timings/rates now = time.time() time_ago_send = int(now - self._cloud_last_send) time_ago_try = int(now - self._cloud_last_try) outstanding_data_length = len(self._cloud_buffer) + len(self._cloud_queue) send = False if outstanding_data_length > 0: # There must be outstanding data # Last send was successful, but the buffer length > batch size send |= outstanding_data_length >= cloud_batch_size and time_ago_send == time_ago_try if cloud_min_interval is not None: # Last send was successful, but it has been too long ago send |= time_ago_send > cloud_min_interval and time_ago_send == time_ago_try if self._cloud_retry_interval is not None: # Last send was unsuccessful, and it has been a while send |= time_ago_send > time_ago_try > self._cloud_retry_interval self.cloud_stats['queue'] = len(self._cloud_queue) self.cloud_stats['buffer'] = self._cloud_buffer_length self.cloud_stats['time_ago_send'] = time_ago_send self.cloud_stats['time_ago_try'] = time_ago_try if send is True: self._cloud_last_try = now try: # Try to send the metrics request = requests.post(metrics_endpoint, data={'metrics': json.dumps(self._cloud_buffer + self._cloud_queue)}, timeout=30.0) return_data = json.loads(request.text) if return_data.get('success', False) is False: raise RuntimeError('{0}'.format(return_data.get('error'))) # If successful; clear buffers if self._metrics_cache_controller.clear_buffer(metric['timestamp']) > 0: self._load_cloud_buffer() self._cloud_queue = [] self._cloud_last_send = now self._cloud_retry_interval = cloud_min_interval if self._throttled_down: self._refresh_cloud_interval() except Exception as ex: logger.error('Error sending metrics to Cloud: {0}'.format(ex)) if time_ago_send > 60 * 60: # Decrease metrics rate, but at least every 2 hours # Decrease cloud try interval, but at least every hour if time_ago_send < 6 * 60 * 60: self._cloud_retry_interval = 15 * 60 new_interval = 30 * 60 elif time_ago_send < 24 * 60 * 60: self._cloud_retry_interval = 30 * 60 new_interval = 60 * 60 else: self._cloud_retry_interval = 60 * 60 new_interval = 2 * 60 * 60 self._throttled_down = True metric_types = Config.get_entry('cloud_metrics_types', []) # type: List[str] for mtype in metric_types: self.set_cloud_interval(mtype, new_interval, save=False) # Buffer metrics if appropriate time_ago_send = int(now - self._cloud_last_send) time_ago_try = int(now - self._cloud_last_try) if time_ago_send > time_ago_try and include_this_metric is True and len(counters_to_buffer) > 0: cache_data = {} for counter, match_setting in six.iteritems(counters_to_buffer): if match_setting is not True: if metric['tags'][match_setting['key']] not in match_setting['matches']: continue cache_data[counter] = metric['values'][counter] if self._metrics_cache_controller.buffer_counter(metric_source, metric_type, metric['tags'], cache_data, metric['timestamp']): self._cloud_buffer_length += 1 if self._metrics_cache_controller.clear_buffer(time.time() - 365 * 24 * 60 * 60) > 0: self._load_cloud_buffer()
def _get_reset_action(self, name, controller): # type: (str, Union[MasterController,PowerCommunicator]) -> Optional[str] recovery_data_key = 'communication_recovery_{0}'.format(name) recovery_data = Config.get_entry( recovery_data_key, None) # type: Optional[Dict[str, Any]] if recovery_data is None: # Make mypy happy recovery_data = {} stats = controller.get_communication_statistics() calls_timedout = [call for call in stats['calls_timedout']] calls_succeeded = [call for call in stats['calls_succeeded']] service_restart = None device_reset = None backoff = 300 max_attempts = 3 last_device_reset = recovery_data.get('device_reset') last_service_restart = recovery_data.get('service_restart') if len(recovery_data) == 0: device_reset = 'communication_errors' else: backoff = 0 if last_device_reset is None else last_device_reset.get( 'backoff', backoff) if last_device_reset is None or last_device_reset[ 'time'] < time.time() - backoff: device_reset = 'communication_errors' backoff = min(1200, backoff * 2) else: if last_service_restart is None: service_restart = 'communication_errors' else: backoff = last_service_restart.get('backoff', backoff) if last_service_restart['time'] < time.time() - backoff: service_restart = 'communication_errors' backoff = min(1200, backoff * 2) if service_restart is not None or device_reset is not None: # Log debug information try: debug_buffer = controller.get_debug_buffer() action = device_reset or service_restart debug_data = { 'type': 'communication_recovery', 'info': { 'controller': name, 'action': action }, 'data': { 'buffer': debug_buffer, 'calls': { 'timedout': calls_timedout, 'succeeded': calls_succeeded } } } with open( '/tmp/debug_{0}_{1}.json'.format( name, int(time.time())), 'w') as recovery_file: recovery_file.write( json.dumps(debug_data, indent=4, sort_keys=True)) check_output( "ls -tp /tmp/ | grep 'debug_{0}_.*json' | tail -n +10 | while read file; do rm -r /tmp/$file; done" .format(name), shell=True) except Exception as ex: logger.exception('Could not store debug file: {0}'.format(ex)) if service_restart is not None: last_service_restart = last_service_restart or {} attempts = last_service_restart.get('attempts', 0) if attempts < max_attempts: logger.critical( 'Major issues in communication with {0}. Restarting service...' .format(name)) recovery_data['service_restart'] = { 'reason': service_restart, 'time': time.time(), 'attempts': attempts + 1, 'backoff': backoff } Config.set_entry(recovery_data_key, recovery_data) return 'service' else: logger.critical( 'Unable to recover issues in communication with {0}'. format(name)) if device_reset is not None: last_device_reset = last_device_reset or {} attempts = last_device_reset.get('attempts', 0) if attempts < max_attempts: logger.critical( 'Major issues in communication with {0}. Resetting {0}'. format(name)) recovery_data['device_reset'] = { 'reason': device_reset, 'time': time.time(), 'attempts': attempts + 1, 'backoff': backoff } Config.set_entry(recovery_data_key, recovery_data) return 'device' else: logger.critical( 'Unable to recover issues in communication with {0}'. format(name)) return None