def test_service_handlers(self): """ Test if we can calculate metrics """ self.client.login(username=self.user, password=self.passwd) for idx, _l in enumerate(Layer.objects.all()): for inum in range(0, idx + 1): self.client.get(reverse('layer_detail', args=(_l.alternate, )), **{"HTTP_USER_AGENT": self.ua}) requests = RequestEvent.objects.all() c = CollectorAPI() q = requests.order_by('created') c.process_requests(self.service, requests, q.last().created, q.first().created) interval = self.service.check_interval now = datetime.utcnow().replace(tzinfo=pytz.utc) valid_from = now - (2 * interval) valid_to = now self.assertTrue(isinstance(valid_from, datetime)) self.assertTrue(isinstance(valid_to, datetime)) self.assertTrue(isinstance(interval, timedelta))
def _init_server(self): """ Initializing Dispatcher with basic information :return: None """ # self.manage_task() if IS_ENABLED: self._centralized_server = self._get_centralized_server() if self._centralized_server: # self._centralized_server.sync_periodic_task() host = self._centralized_server.host port = self._centralized_server.port db_path = self._centralized_server.db_path if self._centralized_server.db_path else None self._logger = logging.getLogger('geonode-logstash-logger') self._logger.setLevel(logging.INFO) self._handler = GeonodeAsynchronousLogstashHandler( host, port, database_path=db_path, transport=GeonodeTcpTransport) self._logger.addHandler(self._handler) # self.client_ip = socket.gethostbyname(socket.gethostname()) self.client_ip = self._centralized_server.local_ip self._collector = CollectorAPI() self._set_time_range() else: log.error( "Monitoring/analytics disabled, centralized server cannot be set up." )
def test_service_handlers(self): """ Test if we can calculate metrics """ self.client.login(username=self.user, password=self.passwd) for idx, _l in enumerate(Layer.objects.all()): for inum in range(0, idx + 1): self.client.get( reverse('layer_detail', args=(_l.alternate, )), **{"HTTP_USER_AGENT": self.ua}) requests = RequestEvent.objects.all() c = CollectorAPI() q = requests.order_by('created') c.process_requests( self.service, requests, q.last().created, q.first().created) interval = self.service.check_interval now = datetime.utcnow().replace(tzinfo=pytz.utc) valid_from = now - (2 * interval) valid_to = now self.assertTrue(isinstance(valid_from, datetime)) self.assertTrue(isinstance(valid_to, datetime)) self.assertTrue(isinstance(interval, timedelta))
def test_metric_data_endpoints(self): """ Test GeoNode collect metrics """ # Login (optional) self.client.force_login(self.u) self.assertTrue(get_user(self.client).is_authenticated()) _l = Layer.objects.all().first() # Event self.client.get(reverse('layer_detail', args=(_l.alternate, )), **{"HTTP_USER_AGENT": self.ua}) requests = RequestEvent.objects.all() self.assertTrue(requests.count() > 0) # First check for MetricValue table self.assertTrue(MetricValue.objects.all().count() == 0) # Metric data collection collector = CollectorAPI() q = requests.order_by('created') collector.process_requests(self.service, requests, q.first().created, q.last().created) # Second check for MetricValue table self.assertTrue(MetricValue.objects.all().count() >= 0) # Call endpoint url = "%s?%s" % ( reverse('monitoring:api_metric_data', args={'request.users'}), 'last=86400&interval=86400&event_type=view&resource_type=layer') response = self.client.get(url) # noqa
def get(self, request, *args, **kwargs): capi = CollectorAPI() checks = capi.get_notifications() data = {'status': 'ok', 'success': True, 'data': {}} d = data['data'] d['problems'] = problems = [] d['health_level'] = 'ok' _levels = ( 'fatal', 'error', 'warning', ) levels = set() for nc, ncdata in checks: for ncd in ncdata: levels.add(ncd.severity) problems.append(dump(ncd, self.fields)) if levels: for lyr in _levels: if lyr in levels: d['health_level'] = lyr break return json_response(data)
def handle(self, *args, **options): self.collector = CollectorAPI() if options['list_metrics']: self.list_metrics() return interval = timedelta(seconds=options['interval']) metric_names = options['metric_name'] resource = options['resource'] service = options['service'] label = options['label'] if not metric_names: raise CommandError("No metric name") if isinstance(metric_names, types.StringTypes): metric_names = [metric_names] for m in metric_names: #def get_metrics_for(self, metric_name, valid_from=None, valid_to=None, interval=None, service=None, label=None, resource=None): if options['list_labels']: self.list_labels(m) elif options['list_resources']: self.list_resources(m) else: self.show_metrics(m, options['since'], options['until'], interval, resource=resource, label=label)
def handle(self, *args, **options): # Exit early if MONITORING_ENABLED=False if not settings.MONITORING_ENABLED: return self.collector = CollectorAPI() if options['list_metrics']: self.list_metrics() return interval = timedelta(seconds=options['interval']) metric_names = options['metric_name'] resource = options['resource'] # service = options['service'] label = options['label'] if not metric_names: raise CommandError("No metric name") if isinstance(metric_names, str): metric_names = [metric_names] for m in metric_names: if options['list_labels']: self.list_labels(m) elif options['list_resources']: self.list_resources(m) else: self.show_metrics(m, options['since'], options['until'], interval, resource=resource, label=label)
def handle(self, *args, **options): oservice = options['service'] if not oservice: services = Service.objects.all() else: services = [oservice] if options['list_services']: print('available services') for s in services: print(' ', s.name, '(', s.url, ')') print(' type', s.service_type.name) print(' running on', s.host.name, s.host.ip) print(' active:', s.active) if s.last_check: print(' last check:', s.last_check) else: print(' not checked yet') print(' ') return c = CollectorAPI() for s in services: try: self.run_check(s, collector=c, since=options['since'], until=options['until'], force_check=options['force_check'], format=options['format']) except Exception, err: log.error("Cannot collect from %s: %s", s, err, exc_info=err) if options['halt_on_errors']: raise
def test_service_handlers(self): """ Test if we can calculate metrics """ self.client.force_login(self.u) self.assertTrue(get_user(self.client).is_authenticated()) _l = file_upload( os.path.join(gisdata.VECTOR_DATA, "san_andres_y_providencia_poi.shp"), name="san_andres_y_providencia_poi", user=self.u, overwrite=True, ) for idx, _l in enumerate(Layer.objects.all()): for inum in range(0, idx + 1): self.client.get(reverse('layer_detail', args=(_l.alternate, )), **{"HTTP_USER_AGENT": self.ua}) # Works only with Postgres requests = RequestEvent.objects.all() c = CollectorAPI() q = requests.order_by('created') c.process_requests(self.service, requests, q.first().created, q.last().created) interval = self.service.check_interval now = datetime.utcnow().replace(tzinfo=pytz.utc) valid_from = now - (2 * interval) valid_to = now self.assertTrue(isinstance(valid_from, datetime)) self.assertTrue(isinstance(valid_to, datetime)) self.assertTrue(isinstance(interval, timedelta)) # Works only with Postgres metrics = c.get_metrics_for(metric_name='request.ip', valid_from=valid_from, valid_to=valid_to, interval=interval) self.assertIsNotNone(metrics)
def get(self, request, *args, **kwargs): capi = CollectorAPI() checks = capi.get_notifications() data = {'status': 'ok', 'success': True, 'data': {}} d = data['data'] d['problems'] = problems = [] d['health_level'] = 'ok' _levels = ('fatal', 'error', 'warning',) levels = set([]) for nc, ncdata in checks: for ncd in ncdata: levels.add(ncd.severity) problems.append(dump(ncd, self.fields)) if levels: for l in _levels: if l in levels: d['health_level'] = l break return json_response(data)
def handle(self, *args, **options): oservice = options['service'] if not oservice: services = Service.objects.all() else: services = [oservice] if options['list_services']: print('available services') for s in services: print(' ', s.name, '(', s.url, ')') print(' type', s.service_type.name) print(' running on', s.host.name, s.host.ip) print(' active:', s.active) if s.last_check: print(' last check:', s.last_check) else: print(' not checked yet') print(' ') return c = CollectorAPI() for s in services: try: self.run_check(s, collector=c, since=options['since'], until=options['until'], force_check=options['force_check'], format=options['format']) except Exception as err: log.error("Cannot collect from %s: %s", s, err, exc_info=err) if options['halt_on_errors']: raise if not options['do_not_clear']: log.info("Clearing old data") c.clear_old_data() if options['emit_notifications']: log.info("Processing notifications for %s", options['until']) s = Service.objects.first() interval = s.check_interval now = datetime.utcnow().replace(tzinfo=pytz.utc) notifications_check = now - interval c.emit_notifications() #notifications_check)
def handle(self, *args, **kwargs): # Exit early if MONITORING_ENABLED=False if not settings.MONITORING_ENABLED: return c = CollectorAPI() c.aggregate_past_periods()
Metric, ServiceTypeMetric, MetricLabel, MonitoredResource, ExceptionEvent, EventType, NotificationCheck, MetricNotificationCheck, ) from geonode.monitoring.models import do_autoconfigure from geonode.monitoring.utils import TypeChecks, dump, extend_datetime_input_formats from geonode.monitoring.service_handlers import exposes # Create your views here. capi = CollectorAPI() class MetricsList(View): def get(self, *args, **kwargs): _metrics = capi.get_metric_names() out = [] for srv, mlist in _metrics: out.append({ 'service': srv.name, 'metrics': [{ 'name': m.name, 'unit': m.unit, 'type': m.type } for m in mlist]
def test_notifications_api(self): capi = CollectorAPI() start = datetime.utcnow().replace(tzinfo=pytz.utc) start_aligned = align_period_start(start, self.service.check_interval) end_aligned = start_aligned + self.service.check_interval # for (metric_name, field_opt, use_service, # use_resource, use_label, use_ows_service, # minimum, maximum, thresholds,) in thresholds: notifications_config = ('geonode is not working', 'detects when requests are not handled', (('request.count', 'min_value', False, False, False, False, 0, 10, None, 'Number of handled requests is lower than',), ('response.time', 'max_value', False, False, False, False, 500, None, None, 'Response time is higher than',),)) nc = NotificationCheck.create(*notifications_config) self.assertTrue(nc.definitions.all().count() == 2) user = self.u2 pwd = self.passwd2 self.client.login(username=user.username, password=pwd) for nc in NotificationCheck.objects.all(): notifications_config_url = reverse( 'monitoring:api_user_notification_config', args=(nc.id,)) nc_form = nc.get_user_form() self.assertTrue(nc_form) self.assertTrue(nc_form.fields.keys()) vals = [1000000, 100000] data = {'emails': []} data['emails'] = '\n'.join(data['emails']) idx = 0 for fname, field in nc_form.fields.items(): if fname in self.reserved_fields: continue data[fname] = vals[idx] idx += 1 resp = self.client.post(notifications_config_url, data) self.assertEqual(resp.status_code, 400) vals = [7, 600] data = {'emails': '\n'.join( [self.u.email, self.u2.email, '*****@*****.**'])} idx = 0 for fname, field in nc_form.fields.items(): if fname in self.reserved_fields: continue data[fname] = vals[idx] idx += 1 # data['emails'] = '\n'.join(data['emails']) resp = self.client.post(notifications_config_url, data) nc.refresh_from_db() self.assertEqual(resp.status_code, 200, resp) _emails = data['emails'].split('\n')[-1:] _users = data['emails'].split('\n')[:-1] self.assertEqual( set([u.email for u in nc.get_users()]), set(_users)) self.assertEqual( set([email for email in nc.get_emails()]), set(_emails)) metric_rq_count = Metric.objects.get(name='request.count') metric_rq_time = Metric.objects.get(name='response.time') MetricValue.add(metric_rq_count, start_aligned, end_aligned, self.service, label="Count", value_raw=0, value_num=0, value=0) MetricValue.add(metric_rq_time, start_aligned, end_aligned, self.service, label="Count", value_raw=700, value_num=700, value=700) nc = NotificationCheck.objects.get() self.assertTrue(len(nc.get_emails()) > 0) self.assertTrue(len(nc.get_users()) > 0) self.assertEqual(nc.last_send, None) self.assertTrue(nc.can_send) self.assertEqual(len(mail.outbox), 0) # make sure inactive will not trigger anything nc.active = False nc.save() capi.emit_notifications(start) self.assertEqual(len(mail.outbox), 0) nc.active = True nc.save() capi.emit_notifications(start) self.assertTrue(nc.receivers.all().count() > 0) self.assertEqual(len(mail.outbox), nc.receivers.all().count()) nc.refresh_from_db() notifications_url = reverse('monitoring:api_user_notifications') nresp = self.client.get(notifications_url) self.assertEqual(nresp.status_code, 200) ndata = json.loads(nresp.content) self.assertEqual(set([n['id'] for n in ndata['data']]), set(NotificationCheck.objects.all().values_list('id', flat=True))) self.assertTrue(isinstance(nc.last_send, datetime)) self.assertFalse(nc.can_send) mail.outbox = [] self.assertEqual(len(mail.outbox), 0) capi.emit_notifications(start) self.assertEqual(len(mail.outbox), 0) nc.last_send = start - nc.grace_period nc.save() self.assertTrue(nc.can_send) mail.outbox = [] self.assertEqual(len(mail.outbox), 0) capi.emit_notifications(start) self.assertEqual(len(mail.outbox), nc.receivers.all().count())
def test_notifications_api(self): capi = CollectorAPI() start = datetime.utcnow().replace(tzinfo=pytz.utc) start_aligned = align_period_start(start, self.service.check_interval) end_aligned = start_aligned + self.service.check_interval # for (metric_name, field_opt, use_service, # use_resource, use_label, use_ows_service, # minimum, maximum, thresholds,) in thresholds: notifications_config = ( 'geonode is not working', 'detects when requests are not handled', ( ( 'request.count', 'min_value', False, False, False, False, 0, 10, None, 'Number of handled requests is lower than', ), ( 'response.time', 'max_value', False, False, False, False, 500, None, None, 'Response time is higher than', ), )) nc = NotificationCheck.create(*notifications_config) self.assertTrue(nc.definitions.all().count() == 2) user = self.u2 pwd = self.passwd2 self.client.login(username=user.username, password=pwd) for nc in NotificationCheck.objects.all(): notifications_config_url = reverse( 'monitoring:api_user_notification_config', args=(nc.id, )) nc_form = nc.get_user_form() self.assertTrue(nc_form) self.assertTrue(nc_form.fields.keys()) vals = [1000000, 100000] data = {'emails': []} data['emails'] = '\n'.join(data['emails']) idx = 0 for fname, field in nc_form.fields.items(): if fname in self.reserved_fields: continue data[fname] = vals[idx] idx += 1 resp = self.client.post(notifications_config_url, data) self.assertEqual(resp.status_code, 400) vals = [7, 600] data = { 'emails': '\n'.join([self.u.email, self.u2.email, '*****@*****.**']) } idx = 0 for fname, field in nc_form.fields.items(): if fname in self.reserved_fields: continue data[fname] = vals[idx] idx += 1 # data['emails'] = '\n'.join(data['emails']) resp = self.client.post(notifications_config_url, data) nc.refresh_from_db() self.assertEqual(resp.status_code, 200, resp) _emails = data['emails'].split('\n')[-1:] _users = data['emails'].split('\n')[:-1] self.assertEqual(set([u.email for u in nc.get_users()]), set(_users)) self.assertEqual(set([email for email in nc.get_emails()]), set(_emails)) metric_rq_count = Metric.objects.get(name='request.count') metric_rq_time = Metric.objects.get(name='response.time') MetricValue.add(metric_rq_count, start_aligned, end_aligned, self.service, label="Count", value_raw=0, value_num=0, value=0) MetricValue.add(metric_rq_time, start_aligned, end_aligned, self.service, label="Count", value_raw=700, value_num=700, value=700) nc = NotificationCheck.objects.get() self.assertTrue(len(nc.get_emails()) > 0) self.assertTrue(len(nc.get_users()) > 0) self.assertEqual(nc.last_send, None) self.assertTrue(nc.can_send) self.assertEqual(len(mail.outbox), 0) # make sure inactive will not trigger anything nc.active = False nc.save() capi.emit_notifications(start) self.assertEqual(len(mail.outbox), 0) nc.active = True nc.save() capi.emit_notifications(start) self.assertTrue(nc.receivers.all().count() > 0) self.assertEqual(len(mail.outbox), nc.receivers.all().count()) nc.refresh_from_db() notifications_url = reverse('monitoring:api_user_notifications') nresp = self.client.get(notifications_url) self.assertEqual(nresp.status_code, 200) ndata = json.loads(nresp.content) self.assertEqual( set([n['id'] for n in ndata['data']]), set(NotificationCheck.objects.all().values_list('id', flat=True))) self.assertTrue(isinstance(nc.last_send, datetime)) self.assertFalse(nc.can_send) mail.outbox = [] self.assertEqual(len(mail.outbox), 0) capi.emit_notifications(start) self.assertEqual(len(mail.outbox), 0) nc.last_send = start - nc.grace_period nc.save() self.assertTrue(nc.can_send) mail.outbox = [] self.assertEqual(len(mail.outbox), 0) capi.emit_notifications(start) self.assertEqual(len(mail.outbox), nc.receivers.all().count())
class LogstashDispatcher(object): """ Dispatcher of GeoNode metric data for Logstash server """ def __init__(self): self._centralized_server = None self._logger = None self._handler = None self._interval = 0 self._collector = None self._init_server() def _init_server(self): """ Initializing Dispatcher with basic information :return: None """ # self.manage_task() if IS_ENABLED: self._centralized_server = self._get_centralized_server() if self._centralized_server: # self._centralized_server.sync_periodic_task() host = self._centralized_server.host port = self._centralized_server.port db_path = self._centralized_server.db_path if self._centralized_server.db_path else None self._logger = logging.getLogger('geonode-logstash-logger') self._logger.setLevel(logging.INFO) self._handler = GeonodeAsynchronousLogstashHandler( host, port, database_path=db_path, transport=GeonodeTcpTransport) self._logger.addHandler(self._handler) # self.client_ip = socket.gethostbyname(socket.gethostname()) self.client_ip = self._centralized_server.local_ip self._collector = CollectorAPI() self._set_time_range() else: log.error( "Monitoring/analytics disabled, centralized server cannot be set up." ) # @staticmethod # def manage_task(): # """ # Disable celery task # """ # pts = PeriodicTask.objects.filter( # name="dispatch-metrics-task", # task="geonode_logstash.tasks.dispatch_metrics", # ) # for pt in pts: # pt.enabled = IS_ENABLED # pt.save() @staticmethod def _get_centralized_server(): """ Get the Centralized Server instance :return: Centralized Server """ try: cs = CentralizedServer.objects.first() return cs except CentralizedServer.DoesNotExist: log.error("Centralized server not found.") except Exception: pass @staticmethod def get_socket_timeout(): """ Configuring the SOCKET_TIMEOUT from the model :return: SOCKET_TIMEOUT """ cs = LogstashDispatcher._get_centralized_server() if cs and cs.socket_timeout is not None: log.debug(" ---------------------- socket_timeout %s " % cs.socket_timeout) return cs.socket_timeout else: return constants.SOCKET_TIMEOUT @staticmethod def get_queue_check_interval(): """ Configuring the QUEUE_CHECK_INTERVAL from the model :return: QUEUE_CHECK_INTERVAL """ cs = LogstashDispatcher._get_centralized_server() if cs and cs.queue_check_interval is not None: return cs.queue_check_interval else: return constants.QUEUE_CHECK_INTERVAL @staticmethod def get_queue_events_flush_interval(): """ Configuring the QUEUED_EVENTS_FLUSH_INTERVAL from the model :return: QUEUED_EVENTS_FLUSH_INTERVAL """ cs = LogstashDispatcher._get_centralized_server() if cs and cs.queue_events_flush_interval is not None: return cs.queue_events_flush_interval else: return constants.QUEUED_EVENTS_FLUSH_INTERVAL @staticmethod def get_queue_events_flush_count(): """ Configuring the QUEUED_EVENTS_FLUSH_COUNT from the model :return: QUEUED_EVENTS_FLUSH_COUNT """ cs = LogstashDispatcher._get_centralized_server() if cs and cs.queue_events_flush_count is not None: return cs.queue_events_flush_count else: return constants.QUEUED_EVENTS_FLUSH_COUNT @staticmethod def get_queue_events_batch_size(): """ Configuring the QUEUED_EVENTS_BATCH_SIZE from the model :return: QUEUED_EVENTS_BATCH_SIZE """ cs = LogstashDispatcher._get_centralized_server() if cs and cs.queue_events_batch_size is not None: return cs.queue_events_batch_size else: return constants.QUEUED_EVENTS_BATCH_SIZE @staticmethod def get_logstash_db_timeout(): """ Configuring the DATABASE_TIMEOUT from the model :return: DATABASE_TIMEOUT """ cs = LogstashDispatcher._get_centralized_server() if cs and cs.logstash_db_timeout is not None: return cs.logstash_db_timeout else: return constants.DATABASE_TIMEOUT def dispatch_metrics(self): """ Sending the messages :return: None """ if self._centralized_server: if IS_ENABLED: for data_type in DATA_TYPES_MAP: try: msg = self._get_message(data_type) print( " ------------------------------------------------ " ) print(msg) if msg: self._logger.info(msg) time.sleep(LogstashDispatcher.get_socket_timeout()) except Exception as e: # Note: it catches exceptions on current thread only traceback.print_exc() log.error("Sending data failed: " + str(e)) # Updating CentralizedServer self._update_server() else: log.error( "Monitoring/analytics disabled, centralized server cannot be set up." ) else: log.error("Centralized server not found.") def _update_server(self): """ Updating the CentralizedServer instance :return: None """ # We have to wait for db to be updated time.sleep(LogstashDispatcher.get_logstash_db_timeout()) # We have to retrieve the "entry_date" of the last event in queue last_event_date = self._handler.get_last_entry_date() if last_event_date: date_time_obj = datetime.strptime(last_event_date, '%Y-%m-%d %H:%M:%S') self._centralized_server.last_failed_deliver = pytz.utc.localize( date_time_obj) else: # If no events in queue then it means all events have been flushed self._centralized_server.last_successful_deliver = self._valid_to self._centralized_server.last_failed_deliver = None self._centralized_server.next_scheduled_deliver = self._valid_to + timedelta( seconds=self._centralized_server.interval) self._centralized_server.save() def _set_time_range(self): """ Set up the time range as valid_to/valid_from and interval :return: None """ self._valid_to = datetime.utcnow().replace(tzinfo=pytz.utc) self._valid_from = self._valid_to - timedelta( seconds=self._centralized_server.interval) self._valid_from = self._valid_from.replace(tzinfo=pytz.utc) self._interval = (self._valid_to - self._valid_from).total_seconds() def _get_message(self, data_type): """ Retrieving data querying the MetricValue model :param data_type: field mapping to keep only interesting information :return: data dictionary """ has_data = False # Name of the object read by logstash filter (not used in case of "overview") data_name = data_type["name"] # Define data HEADER data = { "format_version": "1.0", "data_type": data_name, "instance": { "name": settings.HOSTNAME, "ip": self.client_ip }, "time": { "startTime": self._valid_from.isoformat(), "endTime": self._valid_to.isoformat() } } # List data container (not used in case of "overview") list_data = [] # For each metric we want to execute a query for metric in data_type["metrics"]: # Name omitted in hooks when retrieving no-list data (es. "overview") is_list = "name" in metric["hooks"] group_by = metric["params"]["group_by"] \ if "params" in metric and "group_by" in metric["params"] \ else None event_type = EventType.get(metric["params"]["event_type"]) \ if "params" in metric and "event_type" in metric["params"] \ else None # Retrieving data through the CollectorAPI object metrics_data = self._collector.get_metrics_data( metric_name=metric["name"], valid_from=self._valid_from, valid_to=self._valid_to, interval=self._interval, event_type=event_type, group_by=group_by) if metrics_data: # data dictionary updating for item in metrics_data: if is_list: name_value = self._build_data(item, metric["hooks"]["name"]) item_value = { k: self._build_data(item, v) for k, v in metric["hooks"].items() } if "countries" == data_name: try: country_iso_3 = pycountry.countries.get( alpha_3=name_value).alpha_3 center = self._get_country_center(country_iso_3) item_value['center'] = center or '' except Exception as e: log.error(str(e)) if is_list: try: list_item = filter( lambda l_item: l_item["name"] == name_value, list_data) _idx = list(list_item)[0] i = list_data.index(_idx) list_data[i].update(item_value) except (IndexError, ValueError) as e: list_data.append(item_value) else: data.update(item_value) has_data = True if list_data: data.update({data_name: list_data}) has_data = True if "extra" in data_type: for extra in data_type["extra"]: # For each "extra" entry we have to define a "_get_{extra}" method data.update({extra: getattr(self, '_get_{}'.format(extra))()}) has_data = True return data if has_data else None @staticmethod def _build_data(item, key): """ Extract interesting data from the query result :param item: query result item :param key: interesting key :return: interesting value """ data = "" try: data = int(item[key]) except KeyError: if "." in key: k, v = key.split(".") data = item[k][v] else: e = Exception e.message("DATA_TYPES_MAP not valid for item {}.".format( str(item))) raise e except ValueError: data = item[key] return data @staticmethod def _get_registered_users(): """ Retrieving the users currently registered in GeoNode :return: users count """ User = get_user_model() return User.objects.count() @staticmethod def _get_layers(): """ Retrieving all the existing layers :return: layers count """ return Layer.objects.count() @staticmethod def _get_maps(): """ Retrieving all the existing maps :return: maps count """ return Map.objects.count() @staticmethod def _get_documents(): """ Retrieving all the existing documents :return: documents count """ return Document.objects.count() def _get_errors(self): """ Retrieving errors :return: errors count """ return ExceptionsListView().get_queryset( valid_to=self._valid_to, valid_from=self._valid_from, interval=self._interval).count() @staticmethod def _get_country_center(iso_3): output = None for _cnt in COUNTRIES_GEODB: if iso_3 == _cnt['country.iso_3']: output = [float(i) for i in _cnt['country.center']] break return output def test_dispatch(self, host=None, port=None): """ Testing connection to the centralized server :return: None """ if self._centralized_server: test_msg = { "format_version": "1.0", "instance": { "name": settings.HOSTNAME, "ip": self.client_ip, }, "test": "test" } sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) sock.settimeout(constants.SOCKET_TIMEOUT) test_host = host if host else self._centralized_server.host test_port = int(port) if port else self._centralized_server.port sock.connect((test_host, test_port)) compressed_msg = self._handler.formatter.json_gzip(test_msg) sock.sendall(compressed_msg)
def collect_metric(**options): from geonode.celery_app import app from geonode.tasks.tasks import memcache_lock from geonode.monitoring.models import Service from geonode.monitoring.collector import CollectorAPI _start_time = None _end_time = None # The cache key consists of the task name and the MD5 digest # of the name. name = b'collect_metric' hexdigest = md5(name).hexdigest() lock_id = '{0}-lock-{1}'.format(name, hexdigest) _start_time = datetime.utcnow().isoformat() log.info('[{}] Collecting Metrics - started @ {}'.format( lock_id, _start_time)) with memcache_lock(lock_id, app.oid) as acquired: if acquired: oservice = options['service'] if not oservice: services = Service.objects.all() else: services = [oservice] if options['list_services']: print('available services') for s in services: print(' ', s.name, '(', s.url, ')') print(' type', s.service_type.name) print(' running on', s.host.name, s.host.ip) print(' active:', s.active) if s.last_check: print(' last check:', s.last_check) else: print(' not checked yet') print(' ') return c = CollectorAPI() for s in services: try: run_check(s, collector=c, since=options['since'], until=options['until'], force_check=options['force_check'], format=options['format']) except Exception as err: log.error("Cannot collect from %s: %s", s, err, exc_info=err) if options['halt_on_errors']: raise if not options['do_not_clear']: log.debug("Clearing old data") c.clear_old_data() if options['emit_notifications']: log.debug("Processing notifications for %s", options['until']) # s = Service.objects.first() # interval = s.check_interval # now = datetime.utcnow().replace(tzinfo=pytz.utc) # notifications_check = now - interval c.emit_notifications() # notifications_check)) _end_time = datetime.utcnow().isoformat() log.info('[{}] Collecting Metrics - finished @ {}'.format( lock_id, _end_time)) return (_start_time, _end_time)
class Command(BaseCommand): """ Run collecting for monitoring """ def add_arguments(self, parser): parser.add_argument('-m', '--list-metrics', dest='list_metrics', action='store_true', default=False, help=_("Show list of metrics")) parser.add_argument('-l', '--list-labels', dest='list_labels', action='store_true', default=False, help=_("Show list of labels for metric")) parser.add_argument('-r', '--list-resources', dest='list_resources', action='store_true', default=False, help=_("Show list of resources for metric")) parser.add_argument('-s', '--since', dest='since', default=None, type=parse_datetime, help=_("Process data since specific timestamp (YYYY-MM-DD HH:MM:SS format). If not provided, last sync will be used.")) parser.add_argument('-u', '--until', dest='until', default=None, type=parse_datetime, help=_("Process data until specific timestamp (YYYY-MM-DD HH:MM:SS format). If not provided, now will be used.")) parser.add_argument('-i', '--interval', dest='interval', default=60, type=int, help=_("Data aggregation interval in seconds (default: 60)")) parser.add_argument('metric_name', default=None, nargs="?", help=_("Metric name")) parser.add_argument('-rr', '--for-resource', dest='resource', type=TypeChecks.resource_type, help=_("Show data for specific resource in resource_type=resource_name format")) parser.add_argument('-ss', '--service', dest='service', type=TypeChecks.service_type, help=_("Show data for specific resource")) parser.add_argument('-ll', '--label', dest='label', type=TypeChecks.label_type, help=_("Show data for specific label")) @timeout_decorator.timeout(LOCAL_TIMEOUT) def handle(self, *args, **options): self.collector = CollectorAPI() if options['list_metrics']: self.list_metrics() return interval = timedelta(seconds=options['interval']) metric_names = options['metric_name'] resource = options['resource'] service = options['service'] label = options['label'] if not metric_names: raise CommandError("No metric name") if isinstance(metric_names, types.StringTypes): metric_names = [metric_names] for m in metric_names: #def get_metrics_for(self, metric_name, valid_from=None, valid_to=None, interval=None, service=None, label=None, resource=None): if options['list_labels']: self.list_labels(m) elif options['list_resources']: self.list_resources(m) else: self.show_metrics(m, options['since'], options['until'], interval, resource=resource, label=label) @timeout_decorator.timeout(LOCAL_TIMEOUT) def list_labels(self, metric, resource=None): labels = self.collector.get_labels_for_metric(metric, resource=resource) print('Labels for metric {}'.format(metric)) for label in labels: print(' ', *label) @timeout_decorator.timeout(LOCAL_TIMEOUT) def list_resources(self, metric): resources = self.collector.get_resources_for_metric(metric) print('Resources for metric {}'.format(metric)) for res in resources: print(' ', '='.join(res)) @timeout_decorator.timeout(LOCAL_TIMEOUT) def show_metrics(self, metric, since, until, interval, resource=None, label=None, service=None): print('Monitoring Metric values for {}'.format(metric)) if service: print(' for service: {} '.format(service)) if resource: print(' for resource: {}={} '.format(resource.type, resource.name)) if label: print(' for label: {} label'.format(label.name)) utc = pytz.utc now = datetime.utcnow().replace(tzinfo=utc) since = since.replace(tzinfo=utc) if since else None until = until.replace(tzinfo=utc) if until else None data = self.collector.get_metrics_for(metric, valid_from=since, valid_to=until, interval=interval, resource=resource, label=label, service=service) print(' since {} until {}\n'.format(data['input_valid_from'].strftime(TIMESTAMP_OUTPUT), data['input_valid_to'].strftime(TIMESTAMP_OUTPUT))) for row in data['data']: val = None if row['data']: val = row['data'][0]['val'] print(' ', row['valid_to'].strftime(TIMESTAMP_OUTPUT), '->', '' if not val else val) @timeout_decorator.timeout(LOCAL_TIMEOUT) def list_metrics(self): _metrics = self.collector.get_metric_names() for stype, metrics in _metrics: print('service type', stype.name) for m in metrics: print(' {}[{}]'.format(m.name, m.type))
class Command(BaseCommand): """ Run collecting for monitoring """ def add_arguments(self, parser): parser.add_argument('-m', '--list-metrics', dest='list_metrics', action='store_true', default=False, help=_("Show list of metrics")) parser.add_argument('-l', '--list-labels', dest='list_labels', action='store_true', default=False, help=_("Show list of labels for metric")) parser.add_argument('-r', '--list-resources', dest='list_resources', action='store_true', default=False, help=_("Show list of resources for metric")) parser.add_argument( '-s', '--since', dest='since', default=None, type=parse_datetime, help=_( "Process data since specific timestamp (YYYY-MM-DD HH:MM:SS format). " "If not provided, last sync will be used.")) parser.add_argument( '-u', '--until', dest='until', default=None, type=parse_datetime, help=_( "Process data until specific timestamp (YYYY-MM-DD HH:MM:SS format). " "If not provided, now will be used.")) parser.add_argument( '-i', '--interval', dest='interval', default=60, type=int, help=_("Data aggregation interval in seconds (default: 60)")) parser.add_argument('metric_name', default=None, nargs="?", help=_("Metric name")) parser.add_argument( '-rr', '--for-resource', dest='resource', type=TypeChecks.resource_type, help= _("Show data for specific resource in resource_type=resource_name format" )) parser.add_argument('-ss', '--service', dest='service', type=TypeChecks.service_type, help=_("Show data for specific resource")) parser.add_argument('-ll', '--label', dest='label', type=TypeChecks.label_type, help=_("Show data for specific label")) @timeout_decorator.timeout(LOCAL_TIMEOUT) def handle(self, *args, **options): # Exit early if MONITORING_ENABLED=False if not settings.MONITORING_ENABLED: return self.collector = CollectorAPI() if options['list_metrics']: self.list_metrics() return interval = timedelta(seconds=options['interval']) metric_names = options['metric_name'] resource = options['resource'] # service = options['service'] label = options['label'] if not metric_names: raise CommandError("No metric name") if isinstance(metric_names, str): metric_names = [metric_names] for m in metric_names: if options['list_labels']: self.list_labels(m) elif options['list_resources']: self.list_resources(m) else: self.show_metrics(m, options['since'], options['until'], interval, resource=resource, label=label) @timeout_decorator.timeout(LOCAL_TIMEOUT) def list_labels(self, metric, resource=None): labels = self.collector.get_labels_for_metric(metric, resource=resource) print(f'Labels for metric {metric}') for label in labels: print(' ', *label) @timeout_decorator.timeout(LOCAL_TIMEOUT) def list_resources(self, metric): resources = self.collector.get_resources_for_metric(metric) print(f'Resources for metric {metric}') for res in resources: print(' ', '='.join(res)) @timeout_decorator.timeout(LOCAL_TIMEOUT) def show_metrics(self, metric, since, until, interval, resource=None, label=None, service=None): print(f'Monitoring Metric values for {metric}') if service: print(f' for service: {service} ') if resource: print(f' for resource: {resource.type}={resource.name} ') if label: print(f' for label: {label.name} label') utc = pytz.utc since = since.replace(tzinfo=utc) if since else None until = until.replace(tzinfo=utc) if until else None data = self.collector.get_metrics_for(metric, valid_from=since, valid_to=until, interval=interval, resource=resource, label=label, service=service) print(f" since {data['input_valid_from'].strftime(TIMESTAMP_OUTPUT)} " f"until {data['input_valid_to'].strftime(TIMESTAMP_OUTPUT)}\n") for row in data['data']: val = None if row['data']: val = row['data'][0]['val'] print(' ', row['valid_to'].strftime(TIMESTAMP_OUTPUT), '->', '' if not val else val) @timeout_decorator.timeout(LOCAL_TIMEOUT) def list_metrics(self): _metrics = self.collector.get_metric_names() for stype, metrics in _metrics: print('service type', stype.name) for m in metrics: print(f' {m.name}[{m.type}]')
def handle(self, *args, **kwargs): c = CollectorAPI() c.aggregate_past_periods()
def collect_metric(**options): from geonode.tasks.tasks import memcache_lock from geonode.monitoring.models import Service from geonode.monitoring.collector import CollectorAPI _start_time = None _end_time = None # The cache key consists of the task name and the MD5 digest # of the name. name = b'collect_metric' hexdigest = md5(name).hexdigest() lock_id = f'{name.decode()}-lock-{hexdigest}' _start_time = _end_time = datetime.utcnow().isoformat() log.info('[{}] Collecting Metrics - started @ {}'.format( lock_id, _start_time)) lock = memcache_lock(lock_id) if lock.acquire(blocking=False) is True: log.info('[{}] Collecting Metrics - [...acquired lock] @ {}'.format( lock_id, _start_time)) try: oservice = options['service'] if not oservice: services = Service.objects.all() else: services = [oservice] if options['list_services']: print('available services') for s in services: print(' ', s.name, '(', s.url, ')') print(' type', s.service_type.name) print(' running on', s.host.name, s.host.ip) print(' active:', s.active) if s.last_check: print(' last check:', s.last_check) else: print(' not checked yet') print(' ') return c = CollectorAPI() for s in services: try: run_check(s, collector=c, since=options['since'], until=options['until'], force_check=options['force_check'], format=options['format']) except Exception as e: log.warning(e) if not options['do_not_clear']: log.info("Clearing old data") c.clear_old_data() if options['emit_notifications']: log.info("Processing notifications for %s", options['until']) # s = Service.objects.first() # interval = s.check_interval # now = datetime.utcnow().replace(tzinfo=pytz.utc) # notifications_check = now - interval c.emit_notifications() # notifications_check)) _end_time = datetime.utcnow().isoformat() log.info('[{}] Collecting Metrics - finished @ {}'.format( lock_id, _end_time)) except Exception as e: log.info('[{}] Collecting Metrics - errored @ {}'.format( lock_id, _end_time)) log.exception(e) finally: lock.release() log.info('[{}] Collecting Metrics - exit @ {}'.format(lock_id, _end_time)) return (_start_time, _end_time)