def get_metrics_for_node(skale, node, is_test_mode): host = GOOD_IP if is_test_mode else node['ip'] metrics = get_ping_node_results(host) if not is_test_mode: healthcheck = get_containers_healthcheck(host) schains_check = check_schains_for_node(skale, node['id'], host) metrics[ 'is_offline'] = metrics['is_offline'] | healthcheck | schains_check logger.info(f'Received metrics from node ID = {node["id"]}: {metrics}') return metrics
def check_schain(schain, node_ip): schain_name = schain['name'] schain_endpoint = get_schain_endpoint(node_ip, schain['http_rpc_port']) logger.info(f'\nChecking {schain_name}: {schain_endpoint}') try: web3 = Web3(HTTPProvider(schain_endpoint)) block_number = web3.eth.blockNumber logger.info(f"Current block number for {schain_name} = {block_number}") return 0 except Exception as err: logger.error(f'Error occurred while getting block number : {err}') return 1
def get_ping_node_results(host) -> dict: """Returns a node host metrics (downtime and latency)""" ping_parser = pingparsing.PingParsing() transmitter = pingparsing.PingTransmitter() transmitter.destination_host = host transmitter.ping_option = '-w1' transmitter.count = 3 result = transmitter.ping() if ping_parser.parse( result).as_dict()['rtt_avg'] is None or ping_parser.parse( result).as_dict()['packet_loss_count'] > 0: is_offline = True latency = -1 logger.info('No connection to host!') else: is_offline = False latency = int((ping_parser.parse(result).as_dict()['rtt_avg']) * 1000) return {'is_offline': is_offline, 'latency': latency}
def get_containers_healthcheck(host): """Return 0 if OK or 1 if failed""" url = get_containers_healthcheck_url(host) logger.info(f'Checking: {url}') try: response = requests.get(url, timeout=15) except requests.exceptions.ConnectionError as err: logger.info(f'Could not connect to {url}') logger.error(err) return 1 except Exception as err: logger.info(f'Could not get data from {url}') logger.error(err) return 1 if response.status_code != requests.codes.ok: logger.info(f'Request failed, status code: {response.status_code}') return 1 res = response.json() if res.get('error') is not None: logger.info(res['error']) return 1 data = res.get('data') if data is None: logger.info(f'No data found checking {url}') return 1 for container in data: if not container['state']['Running'] or container['state']['Paused']: logger.info(f'{container["name"]} is not running or paused') return 1 return 0