def test_get_oasis_prometheus_error_if_metric_not_found(self, _): try: get_oasis_prometheus(ENDPOINT, PARAMS_WITH_MISSING, LOGGER) self.fail('Expected MetricNotFoundException') except MetricNotFoundException: pass
def test_get_oasis_prometheus_result_if_endpoint_has_all_results(self, _): ret = get_oasis_prometheus(ENDPOINT, PARAMS, LOGGER) self.assertEqual(PROCESSED_RESULT, ret)
def test_get_oasis_prometheus_error_if_no_params_where_given(self, _): try: get_oasis_prometheus(ENDPOINT, {}, LOGGER) self.fail('Expected NoParametersGivenException') except NoParametersGivenException: pass
def node_from_node_config(node_config: NodeConfig): # Test connection and match-up chain name log_and_print('Trying to retrieve data from the API of {}'.format( node_config.node_name)) # Try to ping the API to see if configuration is correct try: pong_response = oasis_api_data_wrapper.ping_api( node_config.node_api_url) if pong_response != "pong": log_and_print('WARNING: API of node {} is not reachable.'.format( node_config.node_name)) log_and_print('Success. API is configured correctly') except Exception as e: logger_general.error(e) raise InitialisationException( 'Failed to retrieve data from the API of {}'.format( node_config.node_name)) # Test connection and match-up chain name log_and_print('Trying to retrieve node name {} from API'.format( node_config.node_name)) # Check if the node name exists by Pinging the node # If it doesn't then it is miss configured. try: pong_response = oasis_api_data_wrapper.ping_node( node_config.node_api_url, node_config.node_name) if pong_response != "pong": log_and_print( 'WARNING: Node {} is not configured properly, PANIC node' \ 'name should match that set in the API Server.'.format( node_config.node_name)) log_and_print('Success. node name is configured correctly') except Exception as e: logger_general.error(e) raise InitialisationException( 'Failed to retrieve data from the API of {}'.format( node_config.node_name)) # Get node type node_type = NodeType.VALIDATOR_FULL_NODE \ if node_config.node_is_validator \ else NodeType.NON_VALIDATOR_FULL_NODE # Check if the node public key exists by calling the API to retrieve the # node successfully. # Test connection and match-up chain name if node_config.node_is_validator: log_and_print('Trying to retrieve Node Public Key {} from'.format( node_config.node_name)) try: node_details = oasis_api_data_wrapper.get_node( node_config.node_api_url, node_config.node_name, node_config.node_public_key) entity_public_key = node_details['entity_id'] staking_address = oasis_api_data_wrapper.get_staking_address( node_config.node_api_url, entity_public_key) except Exception as e: logger_general.error(e) raise InitialisationException( 'Failed validating node public key {}'.format( node_config.node_public_key)) else: entity_public_key = EMPTY_URL staking_address = EMPTY_URL # Prometheus configuration should be checked on start up if Peers monitoring # is enabled. try: peers_response = oasis_api_data_wrapper.get_prometheus_gauge( node_config.node_api_url, node_config.node_name, \ "tendermint_p2p_peers") if isinstance(peers_response, int): log_and_print( 'WARNING: Node {} does not have prometheus enabled. Please ' \ 'enable Prometheus to monitor data such as no of Peers'.format( node_config.node_name)) log_and_print('Success. Prometheus is configured correctly') except Exception as e: logger_general.error(e) raise InitialisationException( 'Failed to retrieve Prometheus Data from API of {}'.format( node_config.node_name)) # Node Exporter should be an optional tool for System Monitoring if node_config.node_exporter_url != "": try: metric_to_test = ['process_cpu_seconds_total'] prometheus_data = get_oasis_prometheus( \ node_config.node_exporter_url, metric_to_test, logger_general) process_cpu_seconds_total = ( \ prometheus_data['process_cpu_seconds_total']) node_exporter_url = node_config.node_exporter_url log_and_print('Success. Node Exporter is configured correctly') except Exception as e: log_and_print(e) logger_general.error(e) raise InitialisationException( 'Failed to retrieve Node Exporter Data from URL of {}'.format( node_config.node_name)) else: node_exporter_url = EMPTY_URL # Test connection and match-up chain name log_and_print('Trying to convert the Node {} Key into a Consensus ' \ 'Public Key and a Tendermint Address key '.format( node_config.node_name)) # Retrieve the Consensus Public Key and the Tendermint Address if node_config.node_is_validator: try: consensus_public_key = oasis_api_data_wrapper. \ get_registry_node(node_config.node_api_url, \ node_config.node_name, \ node_config.node_public_key) tendermint_address_key = oasis_api_data_wrapper. \ get_tendermint_address(node_config.node_api_url, \ str(consensus_public_key['consensus'][ 'id'])) log_and_print('Successfully converted node public key into ' \ 'Consensus Public Key and Tendermint Address') except Exception as e: logger_general.error(e) raise InitialisationException( 'Failed to convert a node public key for the node {}'.format( node_config.node_name)) else: consensus_public_key = EMPTY_URL tendermint_address_key = EMPTY_URL chain_id = node_config.chain_name # Initialise node and load any state node = Node(node_config.node_name, node_config.node_api_url, node_exporter_url, node_type, node_config.node_public_key, chain_id, REDIS, node_config.is_archive_node, consensus_public_key, tendermint_address_key, staking_address, entity_public_key, internal_conf=InternalConf) node.load_state(logger_general) # Return node return node
def monitor(self) -> None: metrics_to_monitor = [ 'process_cpu_seconds_total', 'go_memstats_alloc_bytes', 'go_memstats_alloc_bytes_total', 'process_virtual_memory_bytes', 'process_max_fds', 'process_open_fds', 'node_cpu_seconds_total', 'node_filesystem_avail_bytes', 'node_filesystem_size_bytes', 'node_memory_MemTotal_bytes', 'node_memory_MemAvailable_bytes' ] prometheus_data = get_oasis_prometheus(self.prometheus_endpoint, metrics_to_monitor, self.logger) try: process_cpu_seconds_total = ( \ prometheus_data['process_cpu_seconds_total']) self._logger.debug('%s process_cpu_seconds_total: %s', self.system, process_cpu_seconds_total) self.system.set_process_cpu_seconds_total( process_cpu_seconds_total, self.channels, self.logger) except: pass try: process_memory_usage = (prometheus_data['go_memstats_alloc_bytes'] \ / prometheus_data[ 'go_memstats_alloc_bytes_total']) * 100 process_memory_usage = float("{:.2f}".format(process_memory_usage)) self._logger.debug('%s process_memory_usage: %s%', self.system, process_memory_usage) self.system.set_process_memory_usage(process_memory_usage, \ self.channels, self.logger) except: pass try: virtual_memory_usage = \ prometheus_data['process_virtual_memory_bytes'] self._logger.debug('%s virtual_memory_usage: %s', self.system, virtual_memory_usage) self.system.set_virtual_memory_usage(virtual_memory_usage, \ self.channels, self.logger) except: pass try: open_file_descriptors = (prometheus_data['process_open_fds'] / prometheus_data['process_max_fds']) * 100 open_file_descriptors = float("{:.2f}".format( \ open_file_descriptors)) self._logger.debug('%s open_file_descriptors: %s%', self.system, open_file_descriptors) self.system.set_open_file_descriptors(open_file_descriptors, \ self.channels, self.logger) except: pass try: node_cpu_seconds_idle = 0 node_cpu_seconds_total = 0 for i, j in enumerate(prometheus_data['node_cpu_seconds_total']): if json.loads(j)['mode'] == 'idle': node_cpu_seconds_idle += \ prometheus_data['node_cpu_seconds_total'][j] node_cpu_seconds_total += \ prometheus_data['node_cpu_seconds_total'][j] system_cpu_usage = (100 - ((node_cpu_seconds_idle \ / node_cpu_seconds_total) * 100)) system_cpu_usage = float("{:.2f}".format(system_cpu_usage)) self._logger.debug('%s system_cpu_usage: %s%', self.system, system_cpu_usage) self.system.set_system_cpu_usage(system_cpu_usage, \ self.channels, self.logger) except: pass try: system_ram_usage = ((prometheus_data['node_memory_MemTotal_bytes'] \ - prometheus_data[ 'node_memory_MemAvailable_bytes']) / prometheus_data[ 'node_memory_MemTotal_bytes']) * 100 system_ram_usage = float("{:.2f}".format(system_ram_usage)) self._logger.debug('%s system_ram_usage: %s%', self.system, system_ram_usage) self.system.set_system_ram_usage(system_ram_usage, \ self.channels, self.logger) except: pass node_filesystem_avail_bytes = 0 node_filesystem_size_bytes = 0 try: for i, j in enumerate( \ prometheus_data['node_filesystem_avail_bytes']): node_filesystem_avail_bytes += \ prometheus_data['node_filesystem_avail_bytes'][j] for i, j in enumerate( \ prometheus_data['node_filesystem_size_bytes']): node_filesystem_size_bytes += \ prometheus_data['node_filesystem_size_bytes'][j] system_storage_usage = 100 - \ (( node_filesystem_avail_bytes / node_filesystem_size_bytes) * 100) system_storage_usage = float("{:.2f}".format(system_storage_usage)) self._logger.debug('%s system_storage_usage: %s%', self.system, system_storage_usage) self.system.set_system_storage_usage(system_storage_usage, \ self.channels, self.logger) # Output status self._logger.info('%s status: %s', self._monitor_name, \ self.status()) except: pass
def get_node(nodes_so_far: List[NodeConfig], oasis_api_data_wrapper: OasisApiWrapper) -> Optional[NodeConfig]: # Get node's name node_names_so_far = [n.node_name for n in nodes_so_far] while True: node_name = input('Unique node name that is identical to the node name ' 'specified in the API server configuration:\n') if node_name in node_names_so_far: print('Node name must be unique.') elif len(node_name) == 0: print('Node name cannot be empty.') else: break # Get the current chain ID while True: chain_name = input('Node\'s chain ID this can be found at ' 'https://oasis.smartstake.io/:\n') if len(chain_name) == 0: print('Node\'s Chain ID cannot be empty.') else: break # Get node's API Url while True: api_url = input('Node\'s API url (typically http://API_IP:8686):\n') print('Trying to connect to endpoint {}/api/ping'.format(api_url)) try: oasis_api_data_wrapper.ping_api(api_url) print('Success.') break except Exception: if not yn_prompt('Failed to connect to endpoint. Do ' 'you want to try again? (Y/n)\n'): if not yn_prompt( 'Do you still want to add the node? (Y/n)\n'): return None else: break # Ask if node is a validator node_is_validator = yn_prompt('Is this node a validator? (Y/n)\n') # Get Node's Node Exporter Url while True: node_exporter_url = input('Node Exporter url (typically ' 'http://NODE_EXPORTER_URL:9100/metrics), this will be ' 'used to monitor the system statistics. If you do not ' 'wish to monitor system statistics leave it blank:\n') if node_exporter_url: print('Trying to access Node Exporter at'.format(node_exporter_url)) try: metric_to_test = ['process_cpu_seconds_total'] prometheus_data = get_oasis_prometheus( \ node_exporter_url, metric_to_test, DUMMY_LOGGER) process_cpu_seconds_total = ( \ prometheus_data['process_cpu_seconds_total']) oasis_api_data_wrapper.ping_api(api_url) print('Successfully returned CPU seconds total', \ process_cpu_seconds_total) break except Exception: if not yn_prompt('Failed to connect to endpoint. Do ' 'you want to try again? (Y/n)\n'): if not yn_prompt( 'Do you still want to add the node? (Y/n)\n'): return None else: break else: print('Skipping system monitoring setup, the system where the node ' 'is installed will not be monitored') break # Ask if node is an archive node. # Note: if the node is a validator, it must also be an archive node. # However, it was done this way in case of changes in future updates. node_is_archive_node = yn_prompt('Is this node an archive node? (Y/n)\n') monitor_node = yn_prompt('Would you like to monitor this node? (Y/n) \n') # Get validator's node public key if node_is_validator: while True: node_public_key = input('Node\'s public identifier, found inside ' 'the file entity.json within the key-value pair ' '"nodes":"NODE_PUBLIC_KEY", found on the machine ' 'running the node (typically the format is : ' 'J4i/ADAze7jYjcmPZvTFHD/tMa3wt9AMeaQALPXZebs=) : ') if not node_public_key.strip(): if not yn_prompt('You cannot leave the node_public_key ' 'field empty for a validator. Do you want to ' 'try again? (Y/n)\n'): return None else: break else: node_public_key = '' # Return node return NodeConfig(node_name, chain_name, api_url, node_public_key, \ node_is_validator, node_exporter_url, monitor_node, \ node_is_archive_node, True)