Пример #1
0
def main(argv):
    if len(argv) < 4:
        print("error: parameters missing")
        return

    cache = CacheProxy('plugin_process')

    last_config_version = cache.get('version')
    config_version = int(argv[2])
    host_id = argv[3]
    if last_config_version is None or config_version != last_config_version:
        config_client = ConfigClient()
        current_version, config_process_list = config_client.get_user_config(
            'plugin_process', host_id)
        if config_process_list is not None:
            cache.set('process_list', config_process_list)
            cache.set('version', current_version)
    else:
        config_process_list = cache.get('process_list')

    if not config_process_list:
        cache.close()
        return

    ntp_checked, timestamp = time_util.get_ntp_time()

    cpu_total_jiffies = cache.counter_to_gauge('cpu_total_jiffies',
                                               get_cpu_total_jiffies())
    total_mem = get_total_mem()
    pids = get_pids()

    process_info_list = []

    page_size = resource.getpagesize()

    for pid in pids:

        stat_path = '/proc/%d/stat' % pid

        if not os.path.isfile(stat_path):
            continue

        try:
            with open(stat_path, 'r') as f_stat:
                line = f_stat.readline()
                values = line.split(None)
                if len(values) < 24:
                    continue

                name = values[1][1:len(values[1]) - 1]

                cmdline_path = '/proc/%d/cmdline' % pid
                if os.path.isfile(cmdline_path) and os.access(
                        cmdline_path, os.R_OK):
                    with open(cmdline_path, 'r') as f_cmd:
                        cmdline = f_cmd.readline().replace('\0', ' ').strip()
                        if cmdline:
                            name = cmdline

                for p in config_process_list:
                    if fnmatch(name, p):
                        process_info = {'pid': pid, 'name': name, 'match': p}
                        status = values[2]
                        ppid = values[3]

                        process_info['parent_pid'] = ppid
                        process_info['proc_stat_cd'] = status

                        used_cpu_jiff = cache.counter_to_gauge(
                            'used_cpu_jiff_%d' % pid,
                            long(values[13]) + long(values[14]))

                        if used_cpu_jiff is None or cpu_total_jiffies is None:
                            cpu_usert = 0.0
                        else:
                            cpu_usert = used_cpu_jiff * 100.0 / cpu_total_jiffies

                        mem = long(values[23]) * page_size
                        if total_mem is None:
                            mem_usert = 0.0
                        else:
                            mem_usert = mem * 100.0 / total_mem / 1024

                        vir_mem = float(values[22]) / 1024.0

                        thread_num = int(values[19])

                        process_info['cpu_usert'] = cpu_usert
                        process_info['mem_usert'] = mem_usert
                        process_info['mem_byt_cnt'] = vir_mem
                        process_info['thd_cnt'] = thread_num

                        process_info_list.append(process_info)

        except Exception:
            pass

    out_list = []
    for p in config_process_list:

        pid_list = []

        process_count = 0
        tot_cpu_usert = 0.0
        tot_mem_usert = 0.0
        tot_mem_byt_cnt = 0.0
        tot_thd_cnt = 0

        for process_info in process_info_list:
            if process_info['match'] == p:
                process_count += 1
                tot_cpu_usert += process_info['cpu_usert']
                tot_mem_usert += process_info['mem_usert']
                tot_mem_byt_cnt += process_info['mem_byt_cnt']
                tot_thd_cnt += process_info['thd_cnt']
                pid_list.append(process_info['pid'])

        dimensions = {'proc_name': p}

        pid_list_record = cache.get('pip_list_record_' + p)
        cache.set('pip_list_record_' + p, pid_list)
        if pid_list_record is None or len(pid_list_record) == 0:
            if len(pid_list) > 0:
                is_process_up = 1
            else:
                is_process_up = 0
        else:
            is_process_up = 1
            for pid in pid_list_record:
                if pid not in pid_list:
                    is_process_up = 0
                    break

        if process_count == 0:
            metrics = {
                'is_process_up': is_process_up,
                'process_count': process_count,
                'avg_cpu_usert': 0.0,
                'avg_mem_usert': 0.0,
                'avg_mem_byt_cnt': 0.0,
                'avg_thd_cnt': 0,
                'tot_cpu_usert': tot_cpu_usert,
                'tot_mem_usert': tot_mem_usert,
                'tot_mem_byt_cnt': tot_mem_byt_cnt,
                'tot_thd_cnt': tot_thd_cnt
            }
        else:
            metrics = {
                'is_process_up': is_process_up,
                'process_count': process_count,
                'avg_cpu_usert': tot_cpu_usert / process_count,
                'avg_mem_usert': tot_mem_usert / process_count,
                'avg_mem_byt_cnt': tot_mem_byt_cnt / process_count,
                'avg_thd_cnt': tot_thd_cnt / float(process_count),
                'tot_cpu_usert': tot_cpu_usert,
                'tot_mem_usert': tot_mem_usert,
                'tot_mem_byt_cnt': tot_mem_byt_cnt,
                'tot_thd_cnt': tot_thd_cnt
            }

        out = {
            'dimensions': dimensions,
            'metrics': metrics,
            'timestamp': timestamp,
            'ntp_checked': ntp_checked
        }

        out_list.append(out)

    print(json.dumps(out_list))
    sys.stdout.flush()

    cache.close()
Пример #2
0
def main(argv):

    cache = CacheProxy('cpu')

    f = open('/proc/stat')

    out_list = []

    avg_used = 0
    max_used = 0.0
    avg_idle = 0
    avg_user = 0
    avg_sys = 0
    avg_nice = 0
    avg_irq = 0
    avg_softirq = 0
    avg_io_wait = 0

    count_used = 0
    count_idle = 0
    count_user = 0
    count_sys = 0
    count_nice = 0
    count_irq = 0
    count_softirq = 0
    count_io_wait = 0

    ntp_checked, timestamp = time_util.get_ntp_time()

    for line in f:
        values = line.split(None)
        name_len = len(values[0])
        if name_len > 3 and values[0][0:3] == 'cpu':
            cpu_id = int(values[0][3:name_len])

            dimensions = {'cpu_idx': cpu_id}
            metrics = {}

            user_cnt = long(values[1])
            nice_cnt = long(values[2])
            system_cnt = long(values[3])
            idle_cnt = long(values[4])
            iowait_cnt = long(values[5])
            irq_cnt = long(values[6])
            softirq_cnt = long(values[7])
            total_cnt = user_cnt + nice_cnt + system_cnt + idle_cnt + iowait_cnt + irq_cnt + softirq_cnt

            user = cache.counter_to_gauge('user_%d' % cpu_id, user_cnt)
            nice = cache.counter_to_gauge('nice_%d' % cpu_id, nice_cnt)
            system = cache.counter_to_gauge('system_%d' % cpu_id, system_cnt)
            idle = cache.counter_to_gauge('idle_%d' % cpu_id, idle_cnt)
            iowait = cache.counter_to_gauge('iowait_%d' % cpu_id, iowait_cnt)
            irq = cache.counter_to_gauge('irq_%d' % cpu_id, irq_cnt)
            softirq = cache.counter_to_gauge('softirq_%d' % cpu_id,
                                             softirq_cnt)
            total = cache.counter_to_gauge('total_%d' % cpu_id, total_cnt)

            if total is None or total == 0:
                continue

            if user is not None:
                metrics['user_rto'] = user * 100.0 / total
                avg_user += metrics['user_rto']
                count_user += 1

            if system is not None:
                metrics['sys_rto'] = system * 100.0 / total
                avg_sys += metrics['sys_rto']
                count_sys += 1

            if nice is not None:
                metrics['nice_rto'] = nice * 100.0 / total
                avg_nice += metrics['nice_rto']
                count_nice += 1

            if idle is not None:
                metrics['idle_rto'] = idle * 100.0 / total
                avg_idle += metrics['idle_rto']
                count_idle += 1

            if irq is not None:
                metrics['irq_rto'] = irq * 100.0 / total
                avg_irq += metrics['irq_rto']
                count_irq += 1

            if softirq is not None:
                metrics['softirq_rto'] = softirq * 100.0 / total
                avg_softirq += metrics['softirq_rto']
                count_softirq += 1

            if iowait is not None:
                metrics['io_wait_rto'] = iowait * 100.0 / total
                avg_io_wait += metrics['io_wait_rto']
                count_io_wait += 1

            if user is not None and nice is not None \
                    and system is not None and iowait is not None \
                    and irq is not None and softirq is not None:
                used = user + nice + system + iowait + irq + softirq
                metrics['used_rto'] = used * 100.0 / total
                avg_used += metrics['used_rto']
                if metrics['used_rto'] > max_used:
                    max_used = metrics['used_rto']
                count_used += 1

            if metrics:
                out = {
                    'dimensions': dimensions,
                    'metrics': metrics,
                    'timestamp': timestamp,
                    'ntp_checked': ntp_checked
                }
                out_list.append(out)

    metrics = {}
    if count_used > 0:
        metrics['avg_cpu_used_rto'] = avg_used / count_used
        metrics['max_cpu_used_rto'] = max_used
    if count_idle > 0:
        metrics['avg_cpu_idle_rto'] = avg_idle / count_idle
    if count_user > 0:
        metrics['avg_cpu_user_rto'] = avg_user / count_user
    if count_sys > 0:
        metrics['avg_cpu_sys_rto'] = avg_sys / count_sys
    if count_nice > 0:
        metrics['avg_nice_rto'] = avg_nice / count_nice
    if count_irq > 0:
        metrics['avg_irq_rto'] = avg_irq / count_irq
    if count_softirq > 0:
        metrics['avg_softirq_rto'] = avg_softirq / count_softirq
    if count_io_wait > 0:
        metrics['avg_io_wait_rto'] = avg_io_wait / count_io_wait

    # if cpu_count > 0:
    #     metrics = {'avg_cpu_used_rto': avg_used / cpu_count, 'max_cpu_used_rto': max_used,
    #                'avg_cpu_idle_rto': avg_idle / cpu_count, 'avg_cpu_user_rto': avg_user / cpu_count,
    #                'avg_cpu_sys_rto': avg_sys / cpu_count, 'avg_nice_rto': avg_nice / cpu_count,
    #                'avg_irq_rto': avg_irq / cpu_count, 'avg_softirq_rto': avg_softirq / cpu_count,
    #                'avg_io_wait_rto': avg_io_wait / cpu_count}

    if metrics:
        out = {
            'dimensions': {
                'schema_type': 'svr'
            },
            'metrics': metrics,
            'timestamp': timestamp,
            'ntp_checked': ntp_checked
        }
        out_list.append(out)

    if out_list:
        print(json.dumps(out_list))
        sys.stdout.flush()

    f.close()
    cache.close()
Пример #3
0
def main(argv):

    cache = CacheProxy('disk')

    f_diskstats = open("/proc/diskstats")

    out_list = []

    avg_read_byt = 0.0
    avg_write_byt = 0.0
    avg_read = 0.0
    avg_write = 0.0
    max_read_byt = 0
    max_write_byt = 0
    max_read = 0
    max_write = 0

    # disk_count = 0
    count_read_byt = 0
    count_write_byt = 0
    count_read = 0
    count_write = 0

    last_disk_name = None

    ntp_checked, timestamp = time_util.get_ntp_time()

    for line in f_diskstats:
        values = line.split(None)
        if values[3] == "0":
            continue

        # disk_count += 1
        disk_idx = values[2]

        dimensions = {'disk_idx': disk_idx}
        metrics = {}

        rd_ios = cache.counter_to_gauge('rd_ios_%s' % disk_idx,
                                        long(values[3]))
        rd_sectors = cache.counter_to_gauge('rd_sectors_%s' % disk_idx,
                                            long(values[5]))
        # rd_ticks = cache.counter_to_gauge('rd_ticks_%s' % disk_idx, long(values[6]))
        wr_ios = cache.counter_to_gauge('wr_ios_%s' % disk_idx,
                                        long(values[7]))
        wr_sectors = cache.counter_to_gauge('wr_sectors_%s' % disk_idx,
                                            long(values[9]))
        # wr_ticks = cache.counter_to_gauge('wr_ticks_%s' % disk_idx, long(values[10]))
        io_ticks = cache.counter_to_gauge('io_ticks_%s' % disk_idx,
                                          long(values[12]))
        time_in_queue = cache.counter_to_gauge('time_in_queue_%s' % disk_idx,
                                               long(values[13]))

        if rd_ios is None or wr_ios is None:
            continue

        if last_disk_name and disk_idx.find(last_disk_name) > -1:
            continue
        else:
            last_disk_name = disk_idx

        read_byt_cnt = None
        write_byt_cnt = None
        await_tm = None
        avgrq_sz = None
        svc_tm = None

        if rd_sectors is not None:
            read_byt_cnt = rd_sectors * 512
        if wr_sectors is not None:
            write_byt_cnt = wr_sectors * 512

        if rd_sectors is not None and wr_sectors is not None:
            if (rd_ios + wr_ios) > 0:
                avgrq_sz = (rd_sectors + wr_sectors) / float(rd_ios + wr_ios)
            else:
                avgrq_sz = 0.0
        if time_in_queue is not None:
            avgqu_sz = time_in_queue / 1000.0 / 60.0
        if time_in_queue is not None:
            if (rd_ios + wr_ios) > 0:
                await_tm = time_in_queue / float(rd_ios + wr_ios)
            else:
                await_tm = 0.0
        if io_ticks is not None:
            if (rd_ios + wr_ios) > 0:
                svc_tm = io_ticks / float(rd_ios + wr_ios)
            else:
                svc_tm = 0.0
        if io_ticks is not None:
            used_rto = io_ticks * 100 / 1000.0 / 60.0

        if read_byt_cnt is not None:
            metrics['read_byt_cnt'] = read_byt_cnt / 60.0
            avg_read_byt += read_byt_cnt
            if read_byt_cnt > max_read_byt:
                max_read_byt = read_byt_cnt
            count_read_byt += 1
        if write_byt_cnt is not None:
            metrics['write_byt_cnt'] = write_byt_cnt / 60.0
            avg_write_byt += write_byt_cnt
            if write_byt_cnt > max_write_byt:
                max_write_byt = write_byt_cnt
            count_write_byt += 1
        if rd_ios is not None:
            metrics['read_cnt'] = rd_ios / 60.0
            avg_read += rd_ios
            if rd_ios > max_read:
                max_read = rd_ios
            count_read += 1
        if wr_ios is not None:
            metrics['write_cnt'] = wr_ios / 60.0
            avg_write += wr_ios
            if wr_ios > max_write:
                max_write = wr_ios
            count_write += 1
        if avgrq_sz is not None:
            metrics['avgrq_sz'] = avgrq_sz
        if avgqu_sz is not None:
            metrics['avgqu_sz'] = avgqu_sz
        if await_tm is not None:
            metrics['await_tm'] = await_tm
        if svc_tm is not None:
            metrics['svc_tm'] = svc_tm
        if used_rto is not None:
            metrics['used_rto'] = used_rto

        if metrics:
            out = {
                'dimensions': dimensions,
                'metrics': metrics,
                'timestamp': timestamp,
                'ntp_checked': ntp_checked
            }
            out_list.append(out)

    # if disk_count > 0:
    #     metrics = {}
    #
    #     metrics['avg_read_byt_cnt'] = avg_read_byt / 60.0 / disk_count
    #     metrics['avg_write_byt_cnt'] = avg_write_byt / 60.0 / disk_count
    #     metrics['avg_read_cnt'] = avg_read / 60.0 / disk_count
    #     metrics['avg_write_cnt'] = avg_write / 60.0 / disk_count
    #     metrics['max_read_byt_cnt'] = max_read_byt
    #     metrics['max_write_byt_cnt'] = max_write_byt
    #     metrics['max_read_cnt'] = max_read
    #     metrics['max_write_cnt'] = max_write
    #
    #     out = {'dimensions': {'schema_type': 'svr'},
    #            'metrics': metrics,
    #            'timestamp': timestamp,
    #            'ntp_checked': ntp_checked}
    #     out_list.append(out)

    metrics = {}

    if count_read_byt > 0:
        metrics['avg_read_byt_cnt'] = avg_read_byt / 60.0 / count_read_byt
        metrics['max_read_byt_cnt'] = max_read_byt
    if count_write_byt > 0:
        metrics['avg_write_byt_cnt'] = avg_write_byt / 60.0 / count_write_byt
        metrics['max_write_byt_cnt'] = max_write_byt
    if count_read > 0:
        metrics['avg_read_cnt'] = avg_read / 60.0 / count_read
        metrics['max_read_cnt'] = max_read
    if count_write > 0:
        metrics['avg_write_cnt'] = avg_write / 60.0 / count_write
        metrics['max_write_cnt'] = max_write

    if metrics:
        out = {
            'dimensions': {
                'schema_type': 'svr'
            },
            'metrics': metrics,
            'timestamp': timestamp,
            'ntp_checked': ntp_checked
        }
        out_list.append(out)

    if out_list:
        print(json.dumps(out_list))
        sys.stdout.flush()

    f_diskstats.close()
    cache.close()
Пример #4
0
def main(argv):
    HZ = get_HZ()
    # print HZ
    total_mem = get_total_mem()
    # print total_mem

    cache = CacheProxy('process')

    cpu_total_jiffies = cache.counter_to_gauge('cpu_total_jiffies',
                                               get_cpu_total_jiffies())

    pids = get_pids()

    out_list = []

    mem_usert_total = 0.0
    count = 0

    top10 = []

    ntp_checked, timestamp = time_util.get_ntp_time()

    page_size = resource.getpagesize()

    for pid in pids:

        stat_path = '/proc/%d/stat' % pid

        if not os.path.isfile(stat_path):
            continue

        try:
            with open(stat_path, 'r') as f_stat:

                line = f_stat.readline()
                values = line.split(None)
                if len(values) < 24:
                    continue

                name = values[1][1:len(values[1]) - 1]
                status = values[2]
                ppid = values[3]

                dimensions = {
                    'proc_id': pid,
                    'parent_pid': ppid,
                    'proc_nm': name,
                    'proc_stat_cd': status
                }
                metrics = {}

                used_cpu_jiff = cache.counter_to_gauge(
                    'used_cpu_jiff_%d' % pid,
                    long(values[13]) + long(values[14]))

                if used_cpu_jiff is None or cpu_total_jiffies is None:
                    cpu_usert = None
                    t_cpu_usert = None
                else:
                    cpu_usert = used_cpu_jiff * 100.0 / cpu_total_jiffies
                    t_cpu_usert = cpu_usert

                mem = long(values[23]) * page_size
                if total_mem is None:
                    mem_usert = 0.0
                else:
                    mem_usert = mem * 100.0 / total_mem / 1024

                vir_mem = float(values[22]) / 1024.0

                cpu_time = (float(values[13]) + float(values[14])) / HZ

                priority = int(values[17])

                nice = int(values[18])

                thread_num = int(values[19])

                cpu_core_cnt = get_cpu_core_count()

                time_now = time.time()
                start_time = time_now - get_uptime() + float(values[21]) / HZ
                # start_time_local = time.localtime(start_time)
                # start_time_str = time.strftime("%Y-%m-%d %H:%M:%S", start_time_local)

                dimensions['strt_ymdt'] = long(start_time * 1000)
                dimensions['proc_cpu_cnt'] = cpu_core_cnt

                if cpu_usert is not None:
                    metrics['cpu_usert'] = cpu_usert

                if t_cpu_usert is not None:
                    metrics['proc_t_cpu_usert'] = t_cpu_usert

                if mem_usert is not None:
                    metrics['p_proc_mem_usert'] = mem_usert
                    mem_usert_total += mem_usert

                metrics['vir_mem_byt_cnt'] = vir_mem
                metrics['cpu_tm_ss'] = cpu_time
                metrics['prit_rnk'] = priority
                metrics['nice_val'] = nice
                metrics['thd_cnt'] = thread_num

                if metrics:
                    out = {
                        'dimensions': dimensions,
                        'metrics': metrics,
                        'timestamp': timestamp,
                        'ntp_checked': ntp_checked
                    }
                    top10_check_insert(top10, out)
                    # out_list.append(out)
                    count += 1
        except Exception:
            pass

    for item in top10:
        out_list.append(item)

    out = {
        'dimensions': {
            'schema_type': 'svr'
        },
        'metrics': {
            'proc_cnt': count,
            'proc_mem_usert': mem_usert_total,
            'run_que_len': get_run_que_len()
        },
        'timestamp': timestamp,
        'ntp_checked': ntp_checked
    }
    out_list.append(out)

    print(json.dumps(out_list))
    sys.stdout.flush()

    cache.close()
Пример #5
0
def main(argv):

    cache = CacheProxy('memory')

    f_mem_info = open('/proc/meminfo')

    for line in f_mem_info:
        values = line.split(None)
        if values[0] == 'MemTotal:':
            mem_mb = long(values[1]) / 1024.0
        if values[0] == 'MemFree:':
            free_mem_mb = long(values[1]) / 1024.0
            # used_mem_mb = mem_mb - free_mem_mb
            # mem_usert = used_mem_mb * 100 / mem_mb
        if values[0] == 'Shmem:':
            shared_mem_mb = long(values[1]) / 1024.0
        if values[0] == 'Buffers:':
            bffr_mb = long(values[1]) / 1024.0
        if values[0] == 'Cached:':
            cache_mb = long(values[1]) / 1024.0
        if values[0] == 'SwapTotal:':
            swap_mb = long(values[1]) / 1024.0
        if values[0] == 'SwapFree:':
            if swap_mb > 0.0:
                swap_free_mb = long(values[1]) / 1024.0
                swap_used_mb = swap_mb - swap_free_mb
                swap_usert = swap_used_mb * 100 / swap_mb
            else:
                swap_free_mb = 0.0
                swap_used_mb = 0.0
                swap_usert = 0.0

    used_mem_mb = mem_mb - free_mem_mb - bffr_mb - cache_mb
    mem_usert = used_mem_mb * 100 / mem_mb

    f_mem_info.close()

    f_vmstat = open('/proc/vmstat')

    for line in f_vmstat:
        values = line.split(None)
        if values[0] == 'pgpgin':
            pgin_mb = cache.counter_to_gauge('pgin', long(values[1]))
        if values[0] == 'pgpgout':
            pgout_mb = cache.counter_to_gauge('pgout', long(values[1]))
        if values[0] == 'pgfault':
            pgfault = cache.counter_to_gauge('pgfault', long(values[1]))

    f_vmstat.close()
    cache.close()

    dimensions = {}
    metrics = {}
    ntp_checked, timestamp = time_util.get_ntp_time()

    metrics['mem_mb'] = mem_mb
    metrics['free_mem_mb'] = free_mem_mb
    metrics['used_mem_mb'] = used_mem_mb
    metrics['mem_usert'] = mem_usert
    if 'shared_mem_mb' in vars():
        metrics['shared_mem_mb'] = shared_mem_mb
    metrics['bffr_mb'] = bffr_mb
    metrics['cache_mb'] = cache_mb
    metrics['swap_mb'] = swap_mb
    metrics['swap_free_mb'] = swap_free_mb
    metrics['swap_used_mb'] = swap_used_mb
    metrics['swap_usert'] = swap_usert

    if pgin_mb is not None:
        metrics['pgin_mb'] = pgin_mb / 1024.0 / 60
    if pgout_mb is not None:
        metrics['pgout_mb'] = pgout_mb / 1024.0 / 60
    if pgfault is not None:
        metrics['pgfault_tcnt'] = pgfault / 60

    out = {
        'dimensions': dimensions,
        'metrics': metrics,
        'timestamp': timestamp,
        'ntp_checked': ntp_checked
    }
    out_list = [out]
    print(json.dumps(out_list))
    sys.stdout.flush()