Пример #1
0
    def jmx_thread_method(nodes_to_monitor,
                          ignite_nodes,
                          jmx_node_id,
                          metrics_to_collect,
                          timeout=5):
        """
        probe thread that collects JMX metrics from specified nodes

        Uses mocked JMXUtility (does not start new instance, just use existing methods)
        We need to pass nothing to create this instance just override nodes, gateway and service

        :param nodes_to_monitor: nodes that we want to monitor (server nodes in this example)
        :param ignite_nodes: nodes from current Ignite app (need to get PID)
        :param jmx_node_id: jmx node id from tiden.ignite.nodes
        :param metrics_to_collect: {'attr': {'grp': 'Group', 'bean': 'Bean', 'attribute': 'Attr'}, ...}
        :param timeout: timeout to collect metrics
        :return: collected results ('default' python type)
        """
        # Close connections and shutdown gateway properly
        jmx_metric = {}

        jmx = None
        try:
            jmx = JmxUtility()
            jmx.initialize_manually(jmx_node_id, ignite_nodes)

            with open('lock', 'r') as f:
                while True:
                    if f.read(1) == '1':
                        log_print("Background probe JMX has been interrupted")
                        break

                    current_time = get_current_time()
                    for node_idx in nodes_to_monitor:
                        if current_time not in jmx_metric:
                            jmx_metric[current_time] = {}

                        if node_idx not in jmx_metric[current_time]:
                            jmx_metric[current_time][node_idx] = {}

                        for name, metric in metrics_to_collect.items():
                            try:
                                string_value = \
                                    jmx.get_attributes(node_idx,
                                                       metric['grp'],
                                                       metric['bean'],
                                                       metric['attribute'],
                                                       )[metric['attribute']]

                                if metric['type'] == 'int':
                                    jmx_metric[current_time][node_idx][
                                        name] = int(string_value)
                                else:
                                    jmx_metric[current_time][node_idx][
                                        name] = string_value
                            except Exception:
                                jmx_metric[current_time][node_idx][name] = None

                        sleep(timeout)
        except Exception:
            log_print(traceback.format_exc())
        finally:
            # Close connections and shutdown gateway properly
            if jmx:
                jmx.kill_manually()

        return jmx_metric
Пример #2
0
    def heap_thread_method(nodes_to_monitor,
                           ignite_nodes,
                           ssh_config,
                           timeout=5):
        """
        probe thread that collects JVM Heap usage from nodes_to_monitor

        Command to collect: "jcmd PID GC.class_histogram"
        This command prints following text:

        "PID:
        1. JAVA_OBJECT_NUM JAVA_OBJECT_SIZE JAVA_OBJECT_NAME
        ...
        N.
        Total TOTAL_OBJECTS TOTAL_OBJECTS_SIZE"

        So we need to collect PID (to match it to node) and TOTAL_OBJECTS_SIZE from that output.

        :param nodes_to_monitor: nodes that we want to monitor (server nodes in this example)
        :param ignite_nodes: nodes from current Ignite app (need to get PID)
        :param ssh_config: config['ssh_config'] from tiden config (Need to initialize SshPool)
        :return: collected results ('default' python type)
        """
        ssh = SshPool(ssh_config)
        ssh.connect()

        heap_result = {}

        try:
            with open('lock', 'r') as f:
                while True:
                    if f.read(1) == '1':
                        log_print("Background probe HEAP has been interrupted")
                        break

                    commands = {}
                    node_ids_to_pid = {}

                    for node_ids in nodes_to_monitor:
                        node_ids_to_pid[node_ids] = ignite_nodes[node_ids][
                            'PID']

                    for node_idx in nodes_to_monitor:
                        host = ignite_nodes[node_idx]['host']
                        if commands.get(host) is None:
                            commands[host] = [
                                'jcmd %s GC.class_histogram' %
                                ignite_nodes[node_idx]['PID']
                            ]
                        else:
                            commands[host].append(
                                'jcmd %s GC.class_histogram' %
                                ignite_nodes[node_idx]['PID'])

                    results = ssh.exec(commands)

                    results_parsed = {}
                    for host in results.keys():
                        result = results[host][0]

                        findall = re.compile(
                            '(\d+):\n|Total\s+\d+\s+(\d+)').findall(result)

                        # findall will return 2d array: [['PID', ''], [''] ['TOTAL_HEAP_USAGE']]
                        # todo maybe there is a better way to get this
                        if findall:
                            node_id = 0
                            for node_id, pid in node_ids_to_pid.items():
                                if pid == int(findall[0][0]):
                                    node_id = node_id
                                    break

                            try:
                                results_parsed[node_id] = (int(findall[1][1]))
                            except Exception:
                                results_parsed[node_id] = 0
                        else:
                            continue

                    heap_result[get_current_time()] = results_parsed

                    sleep(timeout)
        except Exception:
            log_print(traceback.format_exc())

        return heap_result
Пример #3
0
    def cpu_mem_thread_method(nodes_to_monitor,
                              ignite_nodes,
                              ssh_config,
                              timeout=5):
        """
        probe thread that collects cpu,mem from nodes_to_monitor

        Command to collect: "ps -p PID -o pid,%%cpu,%%mem"

        :param nodes_to_monitor: nodes that we want to monitor (server nodes in this example)
        :param ignite_nodes: nodes from current Ignite app (need to get PID)
        :param ssh_config: config['ssh_config'] from tiden config (Need to initialize SshPool)
        :param timeout: timeout between data collect
        :return: collected results ('default' python type)
        """
        ssh = SshPool(ssh_config)
        ssh.connect()

        cpu_mem_result = {}

        with open('lock', 'r') as f:
            while True:
                if f.read(1) == '1':
                    log_print("Background probe CPU has been interrupted")
                    break

                commands = {}
                node_ids_to_pid = {}

                for node_ids in nodes_to_monitor:
                    node_ids_to_pid[node_ids] = ignite_nodes[node_ids]['PID']

                for node_idx in nodes_to_monitor:
                    host = ignite_nodes[node_idx]['host']
                    if commands.get(host) is None:
                        commands[host] = [
                            'ps -p %s -o pid,%%cpu,%%mem' %
                            ignite_nodes[node_idx]['PID']
                        ]
                    else:
                        commands[host].append('ps -p %s -o pid,%%cpu,%%mem' %
                                              ignite_nodes[node_idx]['PID'])

                results = ssh.exec(commands)

                results_parsed = {}
                for host in results.keys():
                    result = results[host][0]

                    search = re.search('(\d+)\s+?(\d+.?\d?)\s+?(\d+.?\d?)',
                                       result)

                    if search:
                        node_id = 0
                        for node_id, pid in node_ids_to_pid.items():
                            if pid == int(search.group(1)):
                                node_id = node_id
                                break

                        results_parsed[node_id] = (float(search.group(2)),
                                                   float(search.group(3)))
                    else:
                        continue

                cpu_mem_result[get_current_time()] = results_parsed

                sleep(timeout)

        return cpu_mem_result
 def write_event(self, name):
     self.events[get_current_time()] = name
 def stop(self, **kwargs):
     """
     stop probe (store stop time)
     """
     self.end_time = get_current_time()
 def start(self):
     """
     start probe (store start time)
     """
     self.start_time = get_current_time()