def handle(self, service_name, **options): checks_to_do = [service_name] if service_name else list(CHECKS) try: statuses = run_checks(checks_to_do) except UnknownCheckException: print("Services available are:") for service_name in CHECKS.keys(): print("- {}".format(service_name)) else: self.print_results(statuses)
def server_up(req): """ Health check view which can be hooked into server monitoring tools like 'pingdom' Returns: HttpResponse("success", status_code=200) HttpResponse(error_message, status_code=500) Hit serverup.txt to check all the default enabled services (always_check=True) Hit serverup.txt?only={check_name} to only check a specific service Hit serverup.txt?{check_name} to include a non-default check (currently only ``heartbeat``) """ only = req.GET.get('only', None) if only and only in CHECKS: checks_to_do = [only] else: checks_to_do = [ check for check, check_info in CHECKS.items() if check_info['always_check'] or req.GET.get(check, None) is not None ] statuses = run_checks(checks_to_do) failed_checks = [(check, status) for check, status in statuses if not status.success] for check_name, status in statuses: tags = { 'status': 'failed' if not status.success else 'ok', 'check': check_name } metrics_gauge('commcare.serverup.check', status.duration, tags=tags, multiprocess_mode=MPM_MAX) if failed_checks and not is_deploy_in_progress(): status_messages = [ html.linebreaks('<strong>{}</strong>: {}'.format( check, html.escape(status.msg)).strip()) for check, status in failed_checks ] create_metrics_event( 'Serverup check failed', '\n'.join(status_messages), alert_type='error', aggregation_key='serverup', ) status_messages.insert(0, 'Failed Checks (%s):' % os.uname()[1]) return HttpResponse(''.join(status_messages), status=500) else: return HttpResponse("success")
def check_services(request): def get_message(service_name, result): if result.exception: status = "EXCEPTION" msg = repr(result.exception) else: status = "SUCCESS" if result.success else "FAILURE" msg = result.msg return "{} (Took {:6.2f}s) {:15}: {}<br/>".format( status, result.duration, service_name, msg) statuses = run_checks(list(service_checks.CHECKS)) results = [get_message(name, status) for name, status in statuses] return HttpResponse("<pre>" + "".join(results) + "</pre>")
def handle(self, service_name, **options): checks_to_do = [service_name] if service_name else list(CHECKS) try: statuses = run_checks(checks_to_do) except UnknownCheckException: print("Services available are:") for service_name in CHECKS.keys(): print("- {}".format(service_name)) sys.exit(-1) else: self.print_results(statuses) if not all(status[1].success for status in statuses): sys.exit(1)
def check_services(request): def get_message(service_name, result): if result.exception: status = "EXCEPTION" msg = repr(result.exception) else: status = "SUCCESS" if result.success else "FAILURE" msg = result.msg return "{} (Took {:6.2f}s) {:15}: {}<br/>".format(status, result.duration, service_name, msg) statuses = run_checks(list(service_checks.CHECKS)) results = [ get_message(name, status) for name, status in statuses ] return HttpResponse("<pre>" + "".join(results) + "</pre>")
def server_up(req): """ Health check view which can be hooked into server monitoring tools like 'pingdom' Returns: HttpResponse("success", status_code=200) HttpResponse(error_message, status_code=500) Hit serverup.txt to check all the default enabled services (always_check=True) Hit serverup.txt?only={check_name} to only check a specific service Hit serverup.txt?{check_name} to include a non-default check (currently only ``heartbeat``) """ only = req.GET.get('only', None) if only and only in CHECKS: checks_to_do = [only] else: checks_to_do = [ check for check, check_info in CHECKS.items() if check_info['always_check'] or req.GET.get(check, None) is not None ] statuses = run_checks(checks_to_do) failed_checks = [(check, status) for check, status in statuses if not status.success] for check_name, status in statuses: tags = [ 'status:{}'.format('failed' if not status.success else 'ok'), 'check:{}'.format(check_name) ] datadog_gauge('commcare.serverup.check', status.duration, tags=tags) if failed_checks and not is_deploy_in_progress(): status_messages = [ html.linebreaks('<strong>{}</strong>: {}'.format(check, html.escape(status.msg)).strip()) for check, status in failed_checks ] create_datadog_event( 'Serverup check failed', '\n'.join(status_messages), alert_type='error', aggregation_key='serverup', ) status_messages.insert(0, 'Failed Checks (%s):' % os.uname()[1]) return HttpResponse(''.join(status_messages), status=500) else: return HttpResponse("success")