예제 #1
0
    def check(self, instance):
        # Not configured? Not a problem.
        if instance.get("varnishstat", None) is None:
            raise Exception("varnishstat is not configured")
        tags = instance.get('tags', [])
        if tags is None:
            tags = []
        else:
            tags = list(set(tags))
        varnishstat_path = instance.get("varnishstat")
        name = instance.get('name')

        # Get version and version-specific args from varnishstat -V.
        version, use_xml = self._get_version_info(varnishstat_path)

        # Parse metrics from varnishstat.
        arg = '-x' if use_xml else '-1'
        cmd = [varnishstat_path, arg]

        if name is not None:
            cmd.extend(['-n', name])
            tags += [u'varnish_name:%s' % name]
        else:
            tags += [u'varnish_name:default']

        output, _, _ = get_subprocess_output(cmd, self.log)

        self._parse_varnishstat(output, use_xml, tags)

        # Parse service checks from varnishadm.
        varnishadm_path = instance.get('varnishadm')
        if varnishadm_path:
            secretfile_path = instance.get('secretfile', '/etc/varnish/secret')

            cmd = []
            if geteuid() != 0:
                cmd.append('sudo')

            if version < LooseVersion('4.1.0'):
                cmd.extend(
                    [varnishadm_path, '-S', secretfile_path, 'debug.health'])
            else:
                cmd.extend([
                    varnishadm_path, '-S', secretfile_path, 'backend.list',
                    '-p'
                ])

            try:
                output, err, _ = get_subprocess_output(cmd, self.log)
            except OSError as e:
                self.log.error(
                    "There was an error running varnishadm. Make sure 'sudo' is available. %s",
                    e)
                output = None
            if err:
                self.log.error(
                    'Error getting service check from varnishadm: %s', err)

            if output:
                self._parse_varnishadm(output)
예제 #2
0
파일: ceph.py 프로젝트: Everlane/dd-agent
    def _collect_raw(self, ceph_cmd, instance):
        use_sudo = _is_affirmative(instance.get('use_sudo', False))
        ceph_args = []
        if use_sudo:
            test_sudo = os.system('setsid sudo -l < /dev/null')
            if test_sudo != 0:
                raise Exception('The dd-agent user does not have sudo access')
            ceph_args = ['sudo', ceph_cmd]
        else:
            ceph_args = [ceph_cmd]

        args = ceph_args + ['version']
        try:
            output,_,_ = get_subprocess_output(args, self.log)
        except Exception as e:
            raise Exception('Unable to run cmd=%s: %s' % (' '.join(args), str(e)))

        raw = {}
        for cmd in ('mon_status', 'status', 'df detail', 'osd pool stats', 'osd perf', 'health detail'):
            try:
                args = ceph_args + cmd.split() + ['-fjson']
                output,_,_ = get_subprocess_output(args, self.log)
                res = json.loads(output)
            except Exception as e:
                self.log.warning('Unable to parse data from cmd=%s: %s' % (cmd, str(e)))
                continue

            name = cmd.replace(' ', '_')
            raw[name] = res

        return raw
예제 #3
0
파일: ceph.py 프로젝트: wjsl/dd-agent
    def _collect_raw(self, ceph_cmd, instance):
        use_sudo = _is_affirmative(instance.get('use_sudo', False))
        ceph_args = []
        if use_sudo:
            test_sudo = os.system('setsid sudo -l < /dev/null')
            if test_sudo != 0:
                raise Exception('The dd-agent user does not have sudo access')
            ceph_args = ['sudo', ceph_cmd]
        else:
            ceph_args = [ceph_cmd]

        args = ceph_args + ['version']
        try:
            output, _, _ = get_subprocess_output(args, self.log)
        except Exception as e:
            raise Exception('Unable to run cmd=%s: %s' %
                            (' '.join(args), str(e)))

        raw = {}
        for cmd in ('mon_status', 'status', 'df detail', 'osd pool stats',
                    'osd perf'):
            try:
                args = ceph_args + cmd.split() + ['-fjson']
                output, _, _ = get_subprocess_output(args, self.log)
                res = json.loads(output)
            except Exception as e:
                self.log.warning('Unable to parse data from cmd=%s: %s' %
                                 (cmd, str(e)))
                continue

            name = cmd.replace(' ', '_')
            raw[name] = res

        return raw
예제 #4
0
파일: network.py 프로젝트: ross/dd-agent
    def _check_solaris(self, instance):
        # Can't get bytes sent and received via netstat
        # Default to kstat -p link:0:
        try:
            netstat, _, _ = get_subprocess_output(["kstat", "-p", "link:0:"], self.log)
            metrics_by_interface = self._parse_solaris_netstat(netstat)
            for interface, metrics in metrics_by_interface.iteritems():
                self._submit_devicemetrics(interface, metrics)
        except SubprocessOutputEmptyError:
            self.log.exception("Error collecting kstat stats.")

        try:
            netstat, _, _ = get_subprocess_output(["netstat", "-s", "-P" "tcp"], self.log)
            # TCP: tcpRtoAlgorithm=     4 tcpRtoMin           =   200
            # tcpRtoMax           = 60000 tcpMaxConn          =    -1
            # tcpActiveOpens      =    57 tcpPassiveOpens     =    50
            # tcpAttemptFails     =     1 tcpEstabResets      =     0
            # tcpCurrEstab        =     0 tcpOutSegs          =   254
            # tcpOutDataSegs      =   995 tcpOutDataBytes     =1216733
            # tcpRetransSegs      =     0 tcpRetransBytes     =     0
            # tcpOutAck           =   185 tcpOutAckDelayed    =     4
            # ...
            self._submit_regexed_values(netstat, SOLARIS_TCP_METRICS)
        except SubprocessOutputEmptyError:
            self.log.exception("Error collecting TCP stats.")
예제 #5
0
    def _check_solaris(self, instance):
        # Can't get bytes sent and received via netstat
        # Default to kstat -p link:0:
        try:
            netstat, _, _ = get_subprocess_output(["kstat", "-p", "link:0:"], self.log)
            metrics_by_interface = self._parse_solaris_netstat(netstat)
            for interface, metrics in metrics_by_interface.iteritems():
                self._submit_devicemetrics(interface, metrics)
        except SubprocessOutputEmptyError:
            self.log.exception("Error collecting kstat stats.")

        try:
            netstat, _, _ = get_subprocess_output(["netstat", "-s", "-P" "tcp"], self.log)
            # TCP: tcpRtoAlgorithm=     4 tcpRtoMin           =   200
            # tcpRtoMax           = 60000 tcpMaxConn          =    -1
            # tcpActiveOpens      =    57 tcpPassiveOpens     =    50
            # tcpAttemptFails     =     1 tcpEstabResets      =     0
            # tcpCurrEstab        =     0 tcpOutSegs          =   254
            # tcpOutDataSegs      =   995 tcpOutDataBytes     =1216733
            # tcpRetransSegs      =     0 tcpRetransBytes     =     0
            # tcpOutAck           =   185 tcpOutAckDelayed    =     4
            # ...
            self._submit_regexed_values(netstat, SOLARIS_TCP_METRICS)
        except SubprocessOutputEmptyError:
            self.log.exception("Error collecting TCP stats.")
예제 #6
0
    def _check_bsd(self, instance):
        netstat_flags = ['-i', '-b']

        if Platform.is_freebsd():
            netstat_flags.append('-W')

        try:
            output, _, _ = get_subprocess_output(["netstat"] + netstat_flags,
                                                 self.log)
            lines = output.splitlines()

            headers = lines[0].split()

            for h in ("Ipkts", "Ierrs", "Ibytes", "Opkts", "Oerrs", "Obytes",
                      "Coll"):
                if h not in headers:
                    self.logger.error("%s not found in %s; cannot parse" %
                                      (h, headers))
                    return False

            current = None
            for l in lines[1:]:

                if "Name" in l:
                    break

                x = l.split()
                if len(x) == 0:
                    break

                iface = x[0]
                if iface.endswith("*"):
                    iface = iface[:-1]
                if iface == current:
                    continue
                else:
                    current = iface

                if self._parse_value(x[-5]) or self._parse_value(x[-2]):
                    iface = current
                    metrics = {
                        'bytes_rcvd': self._parse_value(x[-5]),
                        'bytes_sent': self._parse_value(x[-2]),
                        'packets_in.count': self._parse_value(x[-7]),
                        'packets_in.error': self._parse_value(x[-6]),
                        'packets_out.count': self._parse_value(x[-4]),
                        'packets_out.error': self._parse_value(x[-3]),
                    }
                    self._submit_devicemetrics(iface, metrics)
        except SubprocessOutputEmptyError:
            self.log.exception("Error collecting connection stats.")

        try:
            netstat, _, _ = get_subprocess_output(
                ["netstat", "-s", "-p"
                 "tcp"], self.log)

            self._submit_regexed_values(netstat, BSD_TCP_METRICS)
        except SubprocessOutputEmptyError:
            self.log.exception("Error collecting TCP stats.")
예제 #7
0
    def _get_nsd_control_stats(self, tags, metrics):
        output = None
        if os.geteuid() == 0:
            # dd-agent is running as root (not recommended)
            output = get_subprocess_output(['nsd-control', 'stats'], self.log,
                                           False)
        else:
            # can dd-agent user run sudo?
            test_sudo = os.system('setsid sudo -l < /dev/null')
            if test_sudo == 0:
                output, _, _ = get_subprocess_output(
                    ['sudo', 'nsd-control', 'stats'], self.log, False)
            else:
                raise Exception('The dd-agent user does not have sudo access')

        for metric in re.findall(r'(\S+)=(.*\d)', output):
            if len(metrics) == 0 or metric[0] in metrics:
                self.log.debug('nsd.{}:{}'.format(metric[0], metric[1]))
                if 'num.' in metric[0]:
                    self.rate(METRIC_PREFIX + metric[0],
                              float(metric[1]),
                              tags=tags)
                else:
                    self.gauge(METRIC_PREFIX + metric[0],
                               float(metric[1]),
                               tags=tags)
예제 #8
0
def get_system_stats():
    systemStats = {
        'machine': platform.machine(),
        'platform': sys.platform,
        'processor': platform.processor(),
        'pythonV': platform.python_version(),
    }

    platf = sys.platform

    try:
        if Platform.is_linux(platf):
            output, _, _ = get_subprocess_output(['grep', 'model name', '/proc/cpuinfo'], log)
            systemStats['cpuCores'] = len(output.splitlines())

        if Platform.is_darwin(platf) or Platform.is_freebsd(platf):
            output, _, _ = get_subprocess_output(['sysctl', 'hw.ncpu'], log)
            systemStats['cpuCores'] = int(output.split(': ')[1])
    except SubprocessOutputEmptyError as e:
        log.warning("unable to retrieve number of cpuCores. Failed with error %s", e)

    if Platform.is_linux(platf):
        systemStats['nixV'] = platform.dist()

    elif Platform.is_darwin(platf):
        systemStats['macV'] = platform.mac_ver()

    elif Platform.is_freebsd(platf):
        version = platform.uname()[2]
        systemStats['fbsdV'] = ('freebsd', version, '')  # no codename for FreeBSD

    elif Platform.is_win32(platf):
        systemStats['winV'] = platform.win32_ver()

    return systemStats
예제 #9
0
    def _get_postqueue_stats(self, postfix_config_dir, tags):

        # get some intersting configuratin values from postconf
        pc_output, _, _ = get_subprocess_output(['postconf', 'mail_version'],
                                                self.log, False)
        postfix_version = pc_output.strip('\n').split('=')[1].strip()
        pc_output, _, _ = get_subprocess_output(
            ['postconf', 'authorized_mailq_users'], self.log, False)
        authorized_mailq_users = pc_output.strip('\n').split('=')[1].strip()

        self.log.debug(
            'authorized_mailq_users : {}'.format(authorized_mailq_users))

        output, _, _ = get_subprocess_output(
            ['postqueue', '-c', postfix_config_dir, '-p'], self.log, False)

        active_count = 0
        hold_count = 0
        deferred_count = 0

        # postque -p sample output
        '''
        root@postfix:/opt/datadog-agent/agent/checks.d# postqueue -p
        ----Queue ID----- --Size-- ---Arrival Time---- --Sender/Recipient------
        3xWyLP6Nmfz23fk        367 Tue Aug 15 16:17:33 [email protected]
                                                            (deferred transport)
                                                            [email protected]

        3xWyD86NwZz23ff!       358 Tue Aug 15 16:12:08 [email protected]
                                                            (deferred transport)
                                                            [email protected]

        -- 1 Kbytes in 2 Requests.
        '''

        for line in output.splitlines():
            if '*' in line:
                active_count += 1
                continue
            if '!' in line:
                hold_count += 1
                continue
            if line[0:1].isdigit():
                deferred_count += 1

        self.log.debug('Postfix Version: %s' % postfix_version)

        self.gauge('postfix.queue.size',
                   active_count,
                   tags=tags +
                   ['queue:active', 'instance:{}'.format(postfix_config_dir)])
        self.gauge('postfix.queue.size',
                   hold_count,
                   tags=tags +
                   ['queue:hold', 'instance:{}'.format(postfix_config_dir)])
        self.gauge(
            'postfix.queue.size',
            deferred_count,
            tags=tags +
            ['queue:deferred', 'instance:{}'.format(postfix_config_dir)])
예제 #10
0
파일: config.py 프로젝트: bsandvik/dd-agent
def get_system_stats():
    systemStats = {
        'machine': platform.machine(),
        'platform': sys.platform,
        'processor': platform.processor(),
        'pythonV': platform.python_version(),
    }

    platf = sys.platform

    try:
        if Platform.is_linux(platf):
            output, _, _ = get_subprocess_output(['grep', 'model name', '/proc/cpuinfo'], log)
            systemStats['cpuCores'] = len(output.splitlines())

        if Platform.is_darwin(platf) or Platform.is_freebsd(platf):
            output, _, _ = get_subprocess_output(['sysctl', 'hw.ncpu'], log)
            systemStats['cpuCores'] = int(output.split(': ')[1])
    except SubprocessOutputEmptyError as e:
        log.warning("unable to retrieve number of cpuCores. Failed with error %s", e)

    if Platform.is_linux(platf):
        systemStats['nixV'] = platform.dist()

    elif Platform.is_darwin(platf):
        systemStats['macV'] = platform.mac_ver()

    elif Platform.is_freebsd(platf):
        version = platform.uname()[2]
        systemStats['fbsdV'] = ('freebsd', version, '')  # no codename for FreeBSD

    elif Platform.is_win32(platf):
        systemStats['winV'] = platform.win32_ver()

    return systemStats
예제 #11
0
    def check(self, instance):
        if instance.get("varnishstat", None) is None:
            raise Exception("varnishstat is not configured")
        tags = instance.get('tags', [])
        if tags is None:
            tags = []
        else:
            tags = list(set(tags))
        varnishstat_path = instance.get("varnishstat")
        name = instance.get('name')

        version, use_xml = self._get_version_info(varnishstat_path)

        arg = '-x' if use_xml else '-1'
        cmd = [varnishstat_path, arg]

        if name is not None:
            cmd.extend(['-n', name])
            tags += [u'varnish_name:%s' % name]
        else:
            tags += [u'varnish_name:default']

        output, _, _ = get_subprocess_output(cmd, self.log)

        self._parse_varnishstat(output, use_xml, tags)

        varnishadm_path = instance.get('varnishadm')
        if varnishadm_path:
            secretfile_path = instance.get('secretfile', '/etc/varnish/secret')
            cmd = ['sudo', varnishadm_path, '-S', secretfile_path, 'debug.health']
            output, _, _ = get_subprocess_output(cmd, self.log)
            if output:
                self._parse_varnishadm(output)
예제 #12
0
    def check(self, agentConfig):

        io = {}
        try:
            if Platform.is_linux():
                stdout, _, _ = get_subprocess_output(['iostat', '-d', '1', '2', '-x', '-k'], self.logger)
                io.update(self._parse_linux2(stdout))

            elif sys.platform == "sunos5":
                output, _, _ = get_subprocess_output(["iostat", "-x", "-d", "1", "2"], self.logger)
                iostat = output.splitlines()
                lines = [l for l in iostat if len(l) > 0]
                lines = lines[len(lines) / 2:]

                assert "extended device statistics" in lines[0]
                headers = lines[1].split()
                assert "device" in headers
                for l in lines[2:]:
                    cols = l.split()
                    io[cols[0]] = {}
                    for i in range(1, len(cols)):
                        io[cols[0]][self.xlate(headers[i], "sunos")] = cols[i]

            elif sys.platform.startswith("freebsd"):
                output, _, _ = get_subprocess_output(["iostat", "-x", "-d", "1", "2"], self.logger)
                iostat = output.splitlines()
                lines = [l for l in iostat if len(l) > 0]
                lines = lines[len(lines) / 2:]

                assert "extended device statistics" in lines[0]
                headers = lines[1].split()
                assert "device" in headers
                for l in lines[2:]:
                    cols = l.split()
                    io[cols[0]] = {}
                    for i in range(1, len(cols)):
                        io[cols[0]][self.xlate(headers[i], "freebsd")] = cols[i]
            elif sys.platform == 'darwin':
                iostat, _, _ = get_subprocess_output(['iostat', '-d', '-c', '2', '-w', '1'], self.logger)
                io = self._parse_darwin(iostat)
            else:
                return False

            device_blacklist_re = agentConfig.get('device_blacklist_re', None)
            if device_blacklist_re:
                filtered_io = {}
                for device, stats in io.iteritems():
                    if not device_blacklist_re.match(device):
                        filtered_io[device] = stats
            else:
                filtered_io = io
            return filtered_io

        except Exception:
            self.logger.exception("Cannot extract IO statistics")
            return False
예제 #13
0
    def check(self, instance):
        # Not configured? Not a problem.
        if instance.get("varnishstat", None) is None:
            raise Exception("varnishstat is not configured")
        tags = instance.get('tags', [])
        if tags is None:
            tags = []
        else:
            tags = list(set(tags))
        varnishstat_path = instance.get("varnishstat")
        name = instance.get('name')

        # Get version and version-specific args from varnishstat -V.
        version, use_xml = self._get_version_info(varnishstat_path)

        # Parse metrics from varnishstat.
        arg = '-x' if use_xml else '-1'
        cmd = [varnishstat_path, arg]

        if name is not None:
            cmd.extend(['-n', name])
            tags += [u'varnish_name:%s' % name]
        else:
            tags += [u'varnish_name:default']

        output, _, _ = get_subprocess_output(cmd, self.log)

        self._parse_varnishstat(output, use_xml, tags)

        # Parse service checks from varnishadm.
        varnishadm_path = instance.get('varnishadm')
        if varnishadm_path:
            secretfile_path = instance.get('secretfile', '/etc/varnish/secret')

            cmd = []
            if geteuid() != 0:
                cmd.append('sudo')

            if version < LooseVersion('4.1.0'):
                cmd.extend([varnishadm_path, '-S', secretfile_path, 'debug.health'])
            else:
                cmd.extend([varnishadm_path, '-S', secretfile_path, 'backend.list', '-p'])

            try:
                output, err, _ = get_subprocess_output(cmd, self.log)
            except OSError as e:
                self.log.error("There was an error running varnishadm. Make sure 'sudo' is available. %s", e)
                output = None
            if err:
                self.log.error('Error getting service check from varnishadm: %s', err)

            if output:
                self._parse_varnishadm(output)
예제 #14
0
class Ceph(AgentCheck):
    DEFAULT_CEPH_CMD = '/usr/bin/ceph'
    DEFAULT_CEPH_CLUSTER = 'ceph'
    NAMESPACE = "openstack.ceph"

    def _collect_raw(self, ceph_cmd, ceph_cluster, instance):

        use_sudo = _is_affirmative(instance.get('use_sudo', False))
        ceph_args = []
        if use_sudo:
            test_sudo = os.popen('setsid sudo -l < /dev/null').read()
            if test_sudo != 0:
                raise Exception('The monitor-agent user does not have sudo access')
            ceph_args = ['sudo', ceph_cmd]
        else:
            ceph_args = ['sudo', ceph_cmd]
        ceph_args += ["--cluster", ceph_cluster]
        args = ceph_args + ['version']
        try:
            output, _, _ = get_subprocess_output(args, self.log)
        except Exception, e:
            raise Exception('Unable to run cmd=%s: %s' % (' '.join(args), str(e)))
        raw = {}
        for cmd in ('mon_status', 'status', 'df detail', 'osd stat', 'osd pool stats', 'health', 'mds stat'):
            try:
                args = ceph_args + cmd.split() + ['-fjson']
                output, _, _ = get_subprocess_output(args, self.log)
                res = json.loads(output)
            except Exception as e:
                self.log.warning('Unable to parse data from cmd=%s: %s' % (cmd, str(e)))
                continue
            name = cmd.replace(' ', '_')
            raw[name] = res
        return raw
예제 #15
0
    def check(self, instance):
        stat_out, err, _ = get_subprocess_output(self.nfs_cmd, self.log)
        all_devices = []
        this_device = []
        custom_tags = instance.get("tags", [])

        for l in stat_out.splitlines():
            if not l:
                continue
            elif l.find('mounted on') >= 0 and len(this_device) > 0:
                # if it's a new device, create the device and add it to the array
                device = Device(this_device, self.log)
                all_devices.append(device)
                this_device = []
            this_device.append(l.strip().split())

        # Add the last device into the array
        device = Device(this_device, self.log)
        all_devices.append(device)

        # Disregard the first half of device stats (report 1 of 2)
        # as that is the moving average
        all_devices = all_devices[len(all_devices) // 2:]

        for device in all_devices:
            device.send_metrics(self.gauge, custom_tags)
예제 #16
0
 def journalctl_entries(self, args):
     out, err, exitCode = get_subprocess_output(
         [
             'journalctl',
             # One JSON object per line per entry.
             '-o',
             'json',
             # No reason to look at non-system logs.
             '--system',
             # Kernel logs.
             '_TRANSPORT=kernel',
             # A the "error" level.
             'PRIORITY=3'
         ] + args,
         self.log)
     if exitCode != 0:
         self.log.error('journalctl failed, code {0}: {1}'.format(
             exitCode, err))
         self.increment('oom.errors.je.failure')
         return []
     try:
         return [json.loads(line) for line in out.splitlines()]
     except:
         self.log.exception('json parsing failed')
         self.increment('oom.errors.je.jsonfail')
         return []
예제 #17
0
    def _get_version_info(self, varnishstat_path):

        output, error, _ = get_subprocess_output([varnishstat_path, "-V"], self.log)

        use_xml = True
        version = 3

        m1 = re.search(r"varnish-(\d+)", output, re.MULTILINE)
        m2 = re.search(r"varnish-(\d+)", error, re.MULTILINE)

        if m1 is None and m2 is None:
            self.log.warn("Cannot determine the version of varnishstat, assuming 3 or greater")
            self.warning("Cannot determine the version of varnishstat, assuming 3 or greater")
        else:
            if m1 is not None:
                version = int(m1.group(1))
            elif m2 is not None:
                version = int(m2.group(1))

        self.log.debug("Varnish version: %d" % version)

        if version <= 2:
            use_xml = False

        return version, use_xml
예제 #18
0
파일: unix.py 프로젝트: motusllc/dd-agent
    def check(self, agentConfig):
        process_exclude_args = agentConfig.get('exclude_process_args', False)
        if process_exclude_args:
            ps_arg = 'aux'
        else:
            ps_arg = 'auxww'
        # Get output from ps
        try:
            output, _, _ = get_subprocess_output(['ps', ps_arg], self.logger)
            processLines = output.splitlines()  # Also removes a trailing empty line

            del processLines[0]  # Removes the headers
        except Exception:
            self.logger.exception('getProcesses')
            return False

        processes = []

        for line in processLines:
            line = line.split(None, 10)
            processes.append(map(lambda s: s.strip(), line))

        return {'processes':   processes,
                'apiKey':      agentConfig['api_key'],
                'host':        get_hostname(agentConfig)}
예제 #19
0
    def _get_version_info(self, varnishstat_path):
        # Get the varnish version from varnishstat
        output, error, _ = get_subprocess_output([varnishstat_path, "-V"], self.log,
            raise_on_empty_output=False)

        # Assumptions regarding varnish's version
        use_xml = True
        version = 3

        m1 = re.search(r"varnish-(\d+)", output, re.MULTILINE)
        # v2 prints the version on stderr, v3 on stdout
        m2 = re.search(r"varnish-(\d+)", error, re.MULTILINE)

        if m1 is None and m2 is None:
            self.log.warn("Cannot determine the version of varnishstat, assuming 3 or greater")
            self.warning("Cannot determine the version of varnishstat, assuming 3 or greater")
        else:
            if m1 is not None:
                version = int(m1.group(1))
            elif m2 is not None:
                version = int(m2.group(1))

        self.log.debug("Varnish version: %d" % version)

        # Location of varnishstat
        if version <= 2:
            use_xml = False

        return version, use_xml
예제 #20
0
    def check(self, agentConfig):
        process_exclude_args = agentConfig.get('exclude_process_args', False)
        if process_exclude_args:
            ps_arg = 'aux'
        else:
            ps_arg = 'auxww'
        # Get output from ps
        try:
            output, _, _ = get_subprocess_output(['ps', ps_arg], self.logger)
            processLines = output.splitlines(
            )  # Also removes a trailing empty line
        except StandardError:
            self.logger.exception('getProcesses')
            return False

        del processLines[0]  # Removes the headers

        processes = []

        for line in processLines:
            line = line.split(None, 10)
            processes.append(map(lambda s: s.strip(), line))

        return {
            'processes': processes,
            'apiKey': agentConfig['api_key'],
            'host': get_hostname(agentConfig)
        }
예제 #21
0
    def _get_version_info(self, varnishstat_path):
        # Get the varnish version from varnishstat
        output, error, _ = get_subprocess_output(varnishstat_path + ["-V"],
                                                 self.log,
                                                 raise_on_empty_output=False)

        # Assumptions regarding varnish's version
        use_xml = True
        version = LooseVersion('3.0.0')

        m1 = self.version_pattern.search(output, re.MULTILINE)
        # v2 prints the version on stderr, v3 on stdout
        m2 = self.version_pattern.search(error, re.MULTILINE)

        if m1 is None and m2 is None:
            self.log.warn(
                "Cannot determine the version of varnishstat, assuming 3 or greater"
            )
            self.warning(
                "Cannot determine the version of varnishstat, assuming 3 or greater"
            )
        else:
            if m1 is not None:
                version = LooseVersion(m1.group())
            elif m2 is not None:
                version = LooseVersion(m2.group())

        self.log.debug("Varnish version: %s", version)

        # Location of varnishstat
        if version < LooseVersion('3.0.0'):
            use_xml = False

        return version, use_xml
예제 #22
0
    def check(self, agentConfig):
        if Platform.is_linux():
            try:
                with open('/proc/loadavg', 'r') as load_avg:
                    uptime = load_avg.readline().strip()
            except Exception:
                self.logger.exception('Cannot extract load')
                return False

        elif sys.platform in ('darwin', 'sunos5') or sys.platform.startswith("freebsd"):
            try:
                uptime, _, _ = get_subprocess_output(['uptime'], self.logger)
            except Exception:
                self.logger.exception('Cannot extract load')
                return False

        load = [res.replace(',', '.') for res in re.findall(r'([0-9]+[\.,]\d+)', uptime)]

        try:
            cores = int(agentConfig.get('system_stats').get('cpuCores'))
            assert cores >= 1, "Cannot determine number of cores"
            return {'system.load.1': float(load[0]),
                    'system.load.5': float(load[1]),
                    'system.load.15': float(load[2]),
                    'system.load.norm.1': float(load[0]) / cores,
                    'system.load.norm.5': float(load[1]) / cores,
                    'system.load.norm.15': float(load[2]) / cores,
                    }
        except Exception:
            return {'system.load.1': float(load[0]),
                    'system.load.5': float(load[1]),
                    'system.load.15': float(load[2])}
예제 #23
0
    def _get_proc_list(self):
        # Get output from ps
        try:
            process_exclude_args = self.config.get('exclude_process_args',
                                                   False)
            if process_exclude_args:
                ps_arg = 'aux'
            else:
                ps_arg = 'auxww'
            output, _, _ = get_subprocess_output(['ps', ps_arg], self.log)
            processLines = output.splitlines(
            )  # Also removes a trailing empty line
        except Exception:
            self.log.exception('Cannot get process list')
            raise

        #del processLines[0]  # Removes the headers

        processes = []

        for line in processLines:
            line = line.split(None, 10)
            processes.append(map(lambda s: s.strip(), line))

        return processes
예제 #24
0
파일: postfix.py 프로젝트: wjsl/dd-agent
    def _get_queue_count(self, directory, queues, tags):
        for queue in queues:
            queue_path = os.path.join(directory, queue)
            if not os.path.exists(queue_path):
                raise Exception('%s does not exist' % queue_path)

            count = 0
            if os.geteuid() == 0:
                # dd-agent is running as root (not recommended)
                count = sum(
                    len(files) for root, dirs, files in os.walk(queue_path))
            else:
                # can dd-agent user run sudo?
                test_sudo = os.system('setsid sudo -l < /dev/null')
                if test_sudo == 0:
                    output, _, _ = get_subprocess_output(
                        ['sudo', 'find', queue_path, '-type', 'f'], self.log)
                    count = len(output.splitlines())
                else:
                    raise Exception(
                        'The dd-agent user does not have sudo access')

            # emit an individually tagged metric
            self.gauge('postfix.queue.size',
                       count,
                       tags=tags + [
                           'queue:%s' % queue,
                           'instance:%s' % os.path.basename(directory)
                       ])
예제 #25
0
    def _get_version_info(self, varnishstat_path):
        # Get the varnish version from varnishstat
        output, error, _ = get_subprocess_output([varnishstat_path, "-V"],
                                                 self.log)

        # Assumptions regarding varnish's version
        use_xml = True
        version = 3

        m1 = re.search(r"varnish-(\d+)", output, re.MULTILINE)
        # v2 prints the version on stderr, v3 on stdout
        m2 = re.search(r"varnish-(\d+)", error, re.MULTILINE)

        if m1 is None and m2 is None:
            self.log.warn(
                "Cannot determine the version of varnishstat, assuming 3 or greater"
            )
            self.warning(
                "Cannot determine the version of varnishstat, assuming 3 or greater"
            )
        else:
            if m1 is not None:
                version = int(m1.group(1))
            elif m2 is not None:
                version = int(m2.group(1))

        self.log.debug("Varnish version: %d" % version)

        # Location of varnishstat
        if version <= 2:
            use_xml = False

        return version, use_xml
예제 #26
0
    def _get_queue_count(self, directory, queues, tags):
        for queue in queues:
            queue_path = os.path.join(directory, queue)
            if not os.path.exists(queue_path):
                raise Exception('{} does not exist'.format(queue_path))

            count = 0
            if os.geteuid() == 0:
                # dd-agent is running as root (not recommended)
                count = sum(
                    len(files) for root, dirs, files in os.walk(queue_path))
            else:
                # can dd-agent user run sudo?
                test_sudo = os.system('setsid sudo -l < /dev/null')
                if test_sudo == 0:
                    # default to `root` for backward compatibility
                    postfix_user = self.init_config.get('postfix_user', 'root')
                    output, _, _ = get_subprocess_output([
                        'sudo', '-u', postfix_user, 'find', queue_path,
                        '-type', 'f'
                    ], self.log, False)
                    count = len(output.splitlines())
                else:
                    raise Exception(
                        'The dd-agent user does not have sudo access')

            # emit an individually tagged metric
            self.gauge('postfix.queue.size',
                       count,
                       tags=tags + [
                           'queue:{}'.format(queue), 'instance:{}'.format(
                               os.path.basename(directory))
                       ])
예제 #27
0
    def check(self, instance):
        tags = instance.get('tags', [])

        state_counts = defaultdict(int)

        prio_counts = defaultdict(int)

        proc_location = self.agentConfig.get('procfs_path', '/proc').rstrip('/')

        proc_path_map = {
            "inode_info": "sys/fs/inode-nr",
            "stat_info": "stat",
            "entropy_info": "sys/kernel/random/entropy_avail",
        }

        for key, path in proc_path_map.iteritems():
            proc_path_map[key] = "{procfs}/{path}".format(procfs=proc_location, path=path)

        with open(proc_path_map['inode_info'], 'r') as inode_info:
            inode_stats = inode_info.readline().split()
            self.gauge('system.inodes.total', float(inode_stats[0]), tags=tags)
            self.gauge('system.inodes.used', float(inode_stats[1]), tags=tags)

        with open(proc_path_map['stat_info'], 'r') as stat_info:
            lines = [line.strip() for line in stat_info.readlines()]

            for line in lines:
                if line.startswith('ctxt'):
                    ctxt_count = float(line.split(' ')[1])
                    self.monotonic_count('system.linux.context_switches', ctxt_count, tags=tags)
                elif line.startswith('processes'):
                    process_count = int(line.split(' ')[1])
                    self.monotonic_count('system.linux.processes_created', process_count, tags=tags)
                elif line.startswith('intr'):
                    interrupts = int(line.split(' ')[1])
                    self.monotonic_count('system.linux.interrupts', interrupts, tags=tags)

        with open(proc_path_map['entropy_info'], 'r') as entropy_info:
            entropy = entropy_info.readline()
            self.gauge('system.entropy.available', float(entropy), tags=tags)

        ps = get_subprocess_output(['ps', '--no-header', '-eo', 'stat'], self.log)
        for state in ps[0]:
            # Each process state is a flag in a list of characters. See ps(1) for details.
            for flag in list(state):
                if state in PROCESS_STATES:
                    state_counts[PROCESS_STATES[state]] += 1
                elif state in PROCESS_PRIOS:
                    prio_counts[PROCESS_PRIOS[state]] += 1

        for state in state_counts:
            state_tags = list(tags)
            state_tags.append("state:" + state)
            self.gauge('system.processes.states', float(state_counts[state]), state_tags)

        for prio in prio_counts:
            prio_tags = list(tags)
            prio_tags.append("priority:" + prio)
            self.gauge('system.processes.priorities', float(prio_counts[prio]), prio_tags)
예제 #28
0
파일: util.py 프로젝트: rooprob/dd-agent
 def _get_hostname_unix():
     try:
         # try fqdn
         out, _, rtcode = get_subprocess_output(['/bin/hostname', '-f'], log)
         if rtcode == 0:
             return out.strip()
     except Exception:
         return None
예제 #29
0
    def _check_solaris(self, instance):
        try:
            netstat, _, _ = get_subprocess_output(["kstat", "-p", "link:0:"],
                                                  self.log)
            metrics_by_interface = self._parse_solaris_netstat(netstat)
            for interface, metrics in metrics_by_interface.iteritems():
                self._submit_devicemetrics(interface, metrics)
        except SubprocessOutputEmptyError:
            self.log.exception("Error collecting kstat stats.")

        try:
            netstat, _, _ = get_subprocess_output(
                ["netstat", "-s", "-P"
                 "tcp"], self.log)
            self._submit_regexed_values(netstat, SOLARIS_TCP_METRICS)
        except SubprocessOutputEmptyError:
            self.log.exception("Error collecting TCP stats.")
예제 #30
0
파일: disk.py 프로젝트: dadicool/dd-agent
 def collect_metrics_manually(self):
     df_out, _, _ = get_subprocess_output(self.DF_COMMAND + ["-k"], self.log)
     self.log.debug(df_out)
     for device in self._list_devices(df_out):
         self.log.debug("Passed: {0}".format(device))
         tags = [device[1]] if self._tag_by_filesystem else []
         device_name = device[-1] if self._use_mount else device[0]
         for metric_name, value in self._collect_metrics_manually(device).iteritems():
             self.gauge(metric_name, value, tags=tags, device_name=device_name)
예제 #31
0
    def check(self, instance):
        tags = instance.get('tags', [])

        state_counts = defaultdict(int)

        prio_counts = defaultdict(int)

        with open('/proc/sys/fs/inode-nr', 'r') as inode_info:
            inode_stats = inode_info.readline().split()
            self.gauge('system.inodes.total', float(inode_stats[0]), tags=tags)
            self.gauge('system.inodes.used', float(inode_stats[1]), tags=tags)

        with open('/proc/stat', 'r') as stat_info:
            lines = [line.strip() for line in stat_info.readlines()]

            for line in lines:
                if line.startswith('ctxt'):
                    ctxt_count = float(line.split(' ')[1])
                    self.monotonic_count('system.linux.context_switches',
                                         ctxt_count,
                                         tags=tags)
                elif line.startswith('processes'):
                    process_count = int(line.split(' ')[1])
                    self.monotonic_count('system.linux.processes_created',
                                         process_count,
                                         tags=tags)
                elif line.startswith('intr'):
                    interrupts = int(line.split(' ')[1])
                    self.monotonic_count('system.linux.interrupts',
                                         interrupts,
                                         tags=tags)

        with open('/proc/sys/kernel/random/entropy_avail') as entropy_info:
            entropy = entropy_info.readline()
            self.gauge('system.entropy.available', float(entropy), tags=tags)

        ps = get_subprocess_output(['ps', '--no-header', '-eo', 'stat'],
                                   self.log)
        for state in ps[0]:
            # Each process state is a flag in a list of characters. See ps(1) for details.
            for flag in list(state):
                if state in PROCESS_STATES:
                    state_counts[PROCESS_STATES[state]] += 1
                elif state in PROCESS_PRIOS:
                    prio_counts[PROCESS_PRIOS[state]] += 1

        for state in state_counts:
            state_tags = list(tags)
            state_tags.append("state:" + state)
            self.gauge('system.processes.states', float(state_counts[state]),
                       state_tags)

        for prio in prio_counts:
            prio_tags = list(tags)
            prio_tags.append("priority:" + prio)
            self.gauge('system.processes.priorities', float(prio_counts[prio]),
                       prio_tags)
예제 #32
0
 def collect_metrics_manually(self):
     df_out, _, _ = get_subprocess_output(self.DF_COMMAND + ['-k'], self.log)
     self.log.debug(df_out)
     for device in self._list_devices(df_out):
         self.log.debug("Passed: {0}".format(device))
         tags = [device[1]] if self._tag_by_filesystem else []
         device_name = device[-1] if self._use_mount else device[0]
         for metric_name, value in self._collect_metrics_manually(device).iteritems():
             self.gauge(metric_name, value, tags=tags,
                        device_name=device_name)
예제 #33
0
    def check(self, instance):
        # Not configured? Not a problem.
        if instance.get("varnishstat", None) is None:
            raise Exception("varnishstat is not configured")
        tags = instance.get('tags', [])
        if tags is None:
            tags = []
        else:
            tags = list(set(tags))
        varnishstat_path = instance.get("varnishstat")
        name = instance.get('name')

        # Get version and version-specific args from varnishstat -V.
        version, use_xml = self._get_version_info(varnishstat_path)

        # Parse metrics from varnishstat.
        arg = '-x' if use_xml else '-1'
        cmd = [varnishstat_path, arg]

        if name is not None:
            cmd.extend(['-n', name])
            tags += [u'varnish_name:%s' % name]
        else:
            tags += [u'varnish_name:default']

        output, _, _ = get_subprocess_output(cmd, self.log)

        self._parse_varnishstat(output, use_xml, tags)

        # Parse service checks from varnishadm.
        varnishadm_path = instance.get('varnishadm')
        if varnishadm_path:
            # TODO: "debug.health" has been removed since varnish 4.1+. We
            # should check the version and use "backend.list -p" instead.
            secretfile_path = instance.get('secretfile', '/etc/varnish/secret')
            cmd = [
                'sudo', varnishadm_path, '-S', secretfile_path, 'debug.health'
            ]
            output, _, _ = get_subprocess_output(cmd, self.log)
            if output:
                self._parse_varnishadm(output)
예제 #34
0
 def check(self, instance):
     #Check the status of Entropy
     if Platform.is_unix():
         try:
             data, _, _ = get_subprocess_output(
                 ['sudo', 'cat', '/proc/sys/kernel/random/entropy_avail'],
                 self.log, False)
             self.log.debug("Entropy Available:", str(data))
             self.gauge('system.entropy.available', int(data))
         except Exception as e:
             self.log.exception("Failed to collect entropy: ".format(e))
     else:
         self.log.warning('Plugin currently only available on Linux.')
예제 #35
0
 def _get_proc_list(self):
     # Get output from ps
     try:
         process_exclude_args = self.config.get('exclude_process_args', False)
         if process_exclude_args:
             ps_arg = 'aux'
         else:
             ps_arg = 'auxww'
         output, _, _ = get_subprocess_output(['ps', ps_arg], self.log)
         processLines = output.splitlines()  # Also removes a trailing empty line
     except Exception, e:
         self.log.exception('Cannot get process list')
         return False
예제 #36
0
파일: config.py 프로젝트: pvfkb/dd-agent
def get_system_stats():
    systemStats = {
        'machine': platform.machine(),
        'platform': sys.platform,
        'processor': platform.processor(),
        'pythonV': platform.python_version(),
    }

    platf = sys.platform

    if Platform.is_linux(platf):
        output, _, _ = get_subprocess_output(
            ['grep', 'model name', '/proc/cpuinfo'], log)
        systemStats['cpuCores'] = len(output.splitlines())

    if Platform.is_darwin(platf):
        output, _, _ = get_subprocess_output(['sysctl', 'hw.ncpu'], log)
        systemStats['cpuCores'] = int(output.split(': ')[1])

    if Platform.is_freebsd(platf):
        output, _, _ = get_subprocess_output(['sysctl', 'hw.ncpu'], log)
        systemStats['cpuCores'] = int(output.split(': ')[1])

    if Platform.is_linux(platf):
        systemStats['nixV'] = platform.dist()

    elif Platform.is_darwin(platf):
        systemStats['macV'] = platform.mac_ver()

    elif Platform.is_freebsd(platf):
        version = platform.uname()[2]
        systemStats['fbsdV'] = ('freebsd', version, ''
                                )  # no codename for FreeBSD

    elif Platform.is_win32(platf):
        systemStats['winV'] = platform.win32_ver()

    return systemStats
예제 #37
0
 def collect_metrics_manually(self):
     df_out, _, _ = get_subprocess_output(self.DF_COMMAND + ['-k'], self.log)
     self.log.debug(df_out)
     for device in self._list_devices(df_out):
         self.log.debug("Passed: {0}".format(device))
         tags = [device[1], 'filesystem:{}'.format(device[1])] if self._tag_by_filesystem else []
         device_name = device[-1] if self._use_mount else device[0]
         # apply device/mountpoint specific tags
         for regex, device_tags in self._device_tag_re:
             if regex.match(device_name):
                 tags += device_tags
         for metric_name, value in self._collect_metrics_manually(device).iteritems():
             self.gauge(metric_name, value, tags=tags,
                        device_name=device_name)
예제 #38
0
    def check(self, instance):
        # Not configured? Not a problem.
        if instance.get("varnishstat", None) is None:
            raise Exception("varnishstat is not configured")
        tags = instance.get('tags', [])
        if tags is None:
            tags = []
        else:
            tags = list(set(tags))
        varnishstat_path = instance.get("varnishstat")
        name = instance.get('name')

        # Get version and version-specific args from varnishstat -V.
        version, use_xml = self._get_version_info(varnishstat_path)

        # Parse metrics from varnishstat.
        arg = '-x' if use_xml else '-1'
        cmd = [varnishstat_path, arg]

        if name is not None:
            cmd.extend(['-n', name])
            tags += [u'varnish_name:%s' % name]
        else:
            tags += [u'varnish_name:default']

        output, _, _ = get_subprocess_output(cmd, self.log)

        self._parse_varnishstat(output, use_xml, tags)

        # Parse service checks from varnishadm.
        varnishadm_path = instance.get('varnishadm')
        if varnishadm_path:
            secretfile_path = instance.get('secretfile', '/etc/varnish/secret')
            cmd = ['sudo', varnishadm_path, '-S', secretfile_path, 'debug.health']
            output, _, _ = get_subprocess_output(cmd, self.log)
            if output:
                self._parse_varnishadm(output)
예제 #39
0
    def check(self, instance):
        # Not configured? Not a problem.
        if instance.get("varnishstat", None) is None:
            raise Exception("varnishstat is not configured")
        tags = instance.get("tags", [])
        if tags is None:
            tags = []
        else:
            tags = list(set(tags))
        varnishstat_path = instance.get("varnishstat")
        name = instance.get("name")

        # Get version and version-specific args from varnishstat -V.
        version, use_xml = self._get_version_info(varnishstat_path)

        # Parse metrics from varnishstat.
        arg = "-x" if use_xml else "-1"
        cmd = [varnishstat_path, arg]

        if name is not None:
            cmd.extend(["-n", name])
            tags += [u"varnish_name:%s" % name]
        else:
            tags += [u"varnish_name:default"]

        output, _, _ = get_subprocess_output(cmd, self.log)

        self._parse_varnishstat(output, use_xml, tags)

        # Parse service checks from varnishadm.
        varnishadm_path = instance.get("varnishadm")
        if varnishadm_path:
            secretfile_path = instance.get("secretfile", "/etc/varnish/secret")
            cmd = ["sudo", varnishadm_path, "-S", secretfile_path, "debug.health"]
            output, _, _ = get_subprocess_output(cmd, self.log)
            if output:
                self._parse_varnishadm(output)
예제 #40
0
파일: config.py 프로젝트: jszwedko/dd-agent
def get_system_stats():
    systemStats = {
        'machine': platform.machine(),
        'platform': sys.platform,
        'processor': platform.processor(),
        'pythonV': platform.python_version(),
    }

    platf = sys.platform

    if Platform.is_linux(platf):
        output, _, _ = get_subprocess_output(['grep', 'model name', '/proc/cpuinfo'], log)
        systemStats['cpuCores'] = len(output.splitlines())

    if Platform.is_darwin(platf):
        output, _, _ = get_subprocess_output(['sysctl', 'hw.ncpu'], log)
        systemStats['cpuCores'] = int(output.split(': ')[1])

    if Platform.is_freebsd(platf):
        output, _, _ = get_subprocess_output(['sysctl', 'hw.ncpu'], log)
        systemStats['cpuCores'] = int(output.split(': ')[1])

    if Platform.is_linux(platf):
        systemStats['nixV'] = platform.dist()

    elif Platform.is_darwin(platf):
        systemStats['macV'] = platform.mac_ver()

    elif Platform.is_freebsd(platf):
        version = platform.uname()[2]
        systemStats['fbsdV'] = ('freebsd', version, '')  # no codename for FreeBSD

    elif Platform.is_win32(platf):
        systemStats['winV'] = platform.win32_ver()

    return systemStats
예제 #41
0
파일: collector.py 프로젝트: ross/dd-agent
    def _run_gohai(self, options):
        output = None
        try:
            output, err, _ = get_subprocess_output(["gohai"] + options, log)
            if err:
                log.debug("GOHAI LOG | %s", err)
        except OSError as e:
            if e.errno == 2:  # file not found, expected when install from source
                log.info("gohai file not found")
            else:
                log.warning("Unexpected OSError when running gohai %s", e)
        except Exception as e:
            log.warning("gohai command failed with error %s", e)

        return output
예제 #42
0
파일: collector.py 프로젝트: takus/dd-agent
    def _run_gohai(self, options):
        output = None
        try:
            output, err, _ = get_subprocess_output(["gohai"] + options, log)
            if err:
                log.debug("GOHAI LOG | %s", err)
        except OSError as e:
            if e.errno == 2:  # file not found, expected when install from source
                log.info("gohai file not found")
            else:
                log.warning("Unexpected OSError when running gohai %s", e)
        except Exception as e:
            log.warning("gohai command failed with error %s", e)

        return output
예제 #43
0
파일: config.py 프로젝트: dadicool/dd-agent
def get_system_stats():
    systemStats = {
        "machine": platform.machine(),
        "platform": sys.platform,
        "processor": platform.processor(),
        "pythonV": platform.python_version(),
    }

    platf = sys.platform

    if Platform.is_linux(platf):
        output, _, _ = get_subprocess_output(["grep", "model name", "/proc/cpuinfo"], log)
        systemStats["cpuCores"] = len(output.splitlines())

    if Platform.is_darwin(platf):
        output, _, _ = get_subprocess_output(["sysctl", "hw.ncpu"], log)
        systemStats["cpuCores"] = int(output.split(": ")[1])

    if Platform.is_freebsd(platf):
        output, _, _ = get_subprocess_output(["sysctl", "hw.ncpu"], log)
        systemStats["cpuCores"] = int(output.split(": ")[1])

    if Platform.is_linux(platf):
        systemStats["nixV"] = platform.dist()

    elif Platform.is_darwin(platf):
        systemStats["macV"] = platform.mac_ver()

    elif Platform.is_freebsd(platf):
        version = platform.uname()[2]
        systemStats["fbsdV"] = ("freebsd", version, "")  # no codename for FreeBSD

    elif Platform.is_win32(platf):
        systemStats["winV"] = platform.win32_ver()

    return systemStats
예제 #44
0
파일: ceph.py 프로젝트: 7040210/dd-agent
    def _collect_raw(self, ceph_cmd, instance):
        use_sudo = _is_affirmative(instance.get('use_sudo', False))
        ceph_args = []
        if use_sudo:
            test_sudo = os.system('setsid sudo -l < /dev/null')
            if test_sudo != 0:
                raise Exception('The dd-agent user does not have sudo access')
            ceph_args = ['sudo', ceph_cmd]
        else:
            ceph_args = [ceph_cmd]

        args = ceph_args + ['version']
        try:
            output,_,_ = get_subprocess_output(args, self.log)
        except Exception, e:
            raise Exception('Unable to run cmd=%s: %s' % (' '.join(args), str(e)))
예제 #45
0
    def _run_gohai(self, options):
        # Gohai is disabled on Mac for now
        if Platform.is_mac() or not self.agentConfig.get('enable_gohai'):
            return None
        output = None
        try:
            output, err, _ = get_subprocess_output(["gohai"] + options, log)
            if err:
                log.debug("GOHAI LOG | %s", err)
        except OSError as e:
            if e.errno == 2:  # file not found, expected when install from source
                log.info("gohai file not found")
            else:
                log.warning("Unexpected OSError when running gohai %s", e)
        except Exception as e:
            log.warning("gohai command failed with error %s", e)

        return output
예제 #46
0
    def _run_gohai(self, options):
        output = None
        try:
            if not Platform.is_windows():
                command = "gohai"
            else:
                command = "gohai\gohai.exe"
            output, err, _ = get_subprocess_output([command] + options, log)
            if err:
                log.warning("GOHAI LOG | {0}".format(err))
        except OSError as e:
            if e.errno == 2:  # file not found, expected when install from source
                log.info("gohai file not found")
            else:
                log.warning("Unexpected OSError when running gohai %s", e)
        except Exception as e:
            log.warning("gohai command failed with error %s", e)

        return output
예제 #47
0
파일: unix.py 프로젝트: motusllc/dd-agent
    def __init__(self, logger):
        Check.__init__(self, logger)
        macV = None
        if sys.platform == 'darwin':
            macV = platform.mac_ver()
            macV_minor_version = int(re.match(r'10\.(\d+)\.?.*', macV[0]).group(1))

        # Output from top is slightly modified on OS X 10.6 (case #28239) and greater
        if macV and (macV_minor_version >= 6):
            self.topIndex = 6
        else:
            self.topIndex = 5

        self.pagesize = 0
        if sys.platform == 'sunos5':
            try:
                pgsz, _, _ = get_subprocess_output(['pagesize'], self.logger)
                self.pagesize = int(pgsz.strip())
            except Exception:
                # No page size available
                pass
예제 #48
0
    def _get_queue_count(self, directory, queues, tags):
        for queue in queues:
            queue_path = os.path.join(directory, queue)
            if not os.path.exists(queue_path):
                raise Exception('%s does not exist' % queue_path)

            count = 0
            if os.geteuid() == 0:
                # dd-agent is running as root (not recommended)
                count = sum(len(files) for root, dirs, files in os.walk(queue_path))
            else:
                # can dd-agent user run sudo?
                test_sudo = os.system('setsid sudo -l < /dev/null')
                if test_sudo == 0:
                    output, _, _ = get_subprocess_output(['sudo', 'find', queue_path, '-type', 'f'], self.log, False)
                    count = len(output.splitlines())
                else:
                    raise Exception('The dd-agent user does not have sudo access')

            # emit an individually tagged metric
            self.gauge('postfix.queue.size', count, tags=tags + ['queue:%s' % queue, 'instance:%s' % os.path.basename(directory)])
예제 #49
0
파일: unix.py 프로젝트: htgeis/mystore
    def check(self, agentConfig):
        if Platform.is_linux():
            proc_location = agentConfig.get('procfs_path', '/proc').rstrip('/')
            try:
                proc_loadavg = "{0}/loadavg".format(proc_location)
                with open(proc_loadavg, 'r') as load_avg:
                    uptime = load_avg.readline().strip()
            except Exception:
                self.log.exception('Cannot extract load')
                return False

        elif sys.platform in ('darwin', 'sunos5') or sys.platform.startswith("freebsd"):
            # Get output from uptime
            try:
                uptime, _, _ = get_subprocess_output(['uptime'], self.log)
            except Exception:
                self.log.exception('Cannot extract load')
                return False
        else:
            return False

        # Split out the 3 load average values
        load = [res.replace(',', '.') for res in re.findall(r'([0-9]+[\.,]\d+)', uptime)]
        # Normalize load by number of cores
        try:
            cores = int(agentConfig.get('system_stats').get('cpuCores'))
            assert cores >= 1, "Cannot determine number of cores"
            # Compute a normalized load, named .load.norm to make it easy to find next to .load
            return {'system.load.1': float(load[0]),
                    'system.load.5': float(load[1]),
                    'system.load.15': float(load[2]),
                    'system.load.norm.1': float(load[0])/cores,
                    'system.load.norm.5': float(load[1])/cores,
                    'system.load.norm.15': float(load[2])/cores,
                    }
        except Exception:
            # No normalized load available
            return {'system.load.1': float(load[0]),
                    'system.load.5': float(load[1]),
                    'system.load.15': float(load[2])}
예제 #50
0
파일: unix.py 프로젝트: htgeis/mystore
    def check(self, agentConfig):
        process_exclude_args = agentConfig.get('exclude_process_args', False)
        if process_exclude_args:
            ps_arg = 'aux'
        else:
            ps_arg = 'auxww'
        # Get output from ps
        try:
            output, _, _ = get_subprocess_output(['ps', ps_arg], self.log)
            processLines = output.splitlines()  # Also removes a trailing empty line
        except StandardError:
            self.log.exception('getProcesses')
            return False

        del processLines[0]  # Removes the headers

        processes = []

        for line in processLines:
            line = line.split(None, 10)
            processes.append(map(lambda s: s.strip(), line))

        return {'processes':   processes}
예제 #51
0
    def _get_proc_list(self):
        # Get output from ps
        try:
            process_exclude_args = self.config.get('exclude_process_args', False)
            if process_exclude_args:
                ps_arg = 'aux'
            else:
                ps_arg = 'auxww'
            output, _, _ = get_subprocess_output(['ps', ps_arg], self.log)
            processLines = output.splitlines()  # Also removes a trailing empty line
        except Exception:
            self.log.exception('Cannot get process list')
            raise

        del processLines[0]  # Removes the headers

        processes = []

        for line in processLines:
            line = line.split(None, 10)
            processes.append(map(lambda s: s.strip(), line))

        return processes
예제 #52
0
    def _get_server_pid(self, db):
        pid = None

        # Try to get pid from pid file, it can fail for permission reason
        pid_file = None
        try:
            cursor = db.cursor()
            cursor.execute("SHOW VARIABLES LIKE 'pid_file'")
            pid_file = cursor.fetchone()[1]
            cursor.close()
            del cursor
        except Exception:
            self.warning("Error while fetching pid_file variable of MySQL.")

        if pid_file is not None:
            self.log.debug("pid file: %s" % str(pid_file))
            try:
                f = open(pid_file)
                pid = int(f.readline())
                f.close()
            except IOError:
                self.log.debug("Cannot read mysql pid file %s" % pid_file)

        # If pid has not been found, read it from ps
        if pid is None:
            try:
                if sys.platform.startswith("linux"):
                    ps, _, _ = get_subprocess_output(['ps', '-C', 'mysqld', '-o', 'pid'], self.log)
                    pslines = ps.strip().splitlines()
                    # First line is header, second line is mysql pid
                    if len(pslines) == 2:
                        pid = int(pslines[1])
            except Exception:
                self.log.exception("Error while fetching mysql pid from ps")

        return pid
예제 #53
0
    def _check_linux(self, instance):
        if self._collect_cx_state:
            try:
                self.log.debug("Using `ss` to collect connection state")
                # Try using `ss` for increased performance over `netstat`
                for ip_version in ['4', '6']:
                    # Call `ss` for each IP version because there's no built-in way of distinguishing
                    # between the IP versions in the output
                    lines = get_subprocess_output(["ss", "-n", "-u", "-t", "-a", "-{0}".format(ip_version)], self.log).splitlines()
                    # Netid  State      Recv-Q Send-Q     Local Address:Port       Peer Address:Port
                    # udp    UNCONN     0      0              127.0.0.1:8125                  *:*
                    # udp    ESTAB      0      0              127.0.0.1:37036         127.0.0.1:8125
                    # udp    UNCONN     0      0        fe80::a00:27ff:fe1c:3c4:123          :::*
                    # tcp    TIME-WAIT  0      0          90.56.111.177:56867        46.105.75.4:143
                    # tcp    LISTEN     0      0       ::ffff:127.0.0.1:33217  ::ffff:127.0.0.1:7199
                    # tcp    ESTAB      0      0       ::ffff:127.0.0.1:58975  ::ffff:127.0.0.1:2181

                    metrics = self._parse_linux_cx_state(lines[1:], self.TCP_STATES['ss'], 1, ip_version=ip_version)
                    # Only send the metrics which match the loop iteration's ip version
                    for stat, metric in self.CX_STATE_GAUGE.iteritems():
                        if stat[0].endswith(ip_version):
                            self.gauge(metric, metrics.get(metric))

            except OSError:
                self.log.info("`ss` not found: using `netstat` as a fallback")
                lines = get_subprocess_output(["netstat", "-n", "-u", "-t", "-a"], self.log).splitlines()
                # Active Internet connections (w/o servers)
                # Proto Recv-Q Send-Q Local Address           Foreign Address         State
                # tcp        0      0 46.105.75.4:80          79.220.227.193:2032     SYN_RECV
                # tcp        0      0 46.105.75.4:143         90.56.111.177:56867     ESTABLISHED
                # tcp        0      0 46.105.75.4:50468       107.20.207.175:443      TIME_WAIT
                # tcp6       0      0 46.105.75.4:80          93.15.237.188:58038     FIN_WAIT2
                # tcp6       0      0 46.105.75.4:80          79.220.227.193:2029     ESTABLISHED
                # udp        0      0 0.0.0.0:123             0.0.0.0:*
                # udp6       0      0 :::41458                :::*

                metrics = self._parse_linux_cx_state(lines[2:], self.TCP_STATES['netstat'], 5)
                for metric, value in metrics.iteritems():
                    self.gauge(metric, value)

        proc = open('/proc/net/dev', 'r')
        try:
            lines = proc.readlines()
        finally:
            proc.close()
        # Inter-|   Receive                                                 |  Transmit
        #  face |bytes     packets errs drop fifo frame compressed multicast|bytes       packets errs drop fifo colls carrier compressed
        #     lo:45890956   112797   0    0    0     0          0         0    45890956   112797    0    0    0     0       0          0
        #   eth0:631947052 1042233   0   19    0   184          0      1206  1208625538  1320529    0    0    0     0       0          0
        #   eth1:       0        0   0    0    0     0          0         0           0        0    0    0    0     0       0          0
        for l in lines[2:]:
            cols = l.split(':', 1)
            x = cols[1].split()
            # Filter inactive interfaces
            if self._parse_value(x[0]) or self._parse_value(x[8]):
                iface = cols[0].strip()
                metrics = {
                    'bytes_rcvd': self._parse_value(x[0]),
                    'bytes_sent': self._parse_value(x[8]),
                    'packets_in.count': self._parse_value(x[1]),
                    'packets_in.error': self._parse_value(x[2]) + self._parse_value(x[3]),
                    'packets_out.count': self._parse_value(x[9]),
                    'packets_out.error':self._parse_value(x[10]) + self._parse_value(x[11]),
                }
                self._submit_devicemetrics(iface, metrics)

        try:
            proc = open('/proc/net/snmp', 'r')

            # IP:      Forwarding   DefaultTTL InReceives     InHdrErrors  ...
            # IP:      2            64         377145470      0            ...
            # Icmp:    InMsgs       InErrors   InDestUnreachs InTimeExcds  ...
            # Icmp:    1644495      1238       1643257        0            ...
            # IcmpMsg: InType3      OutType3
            # IcmpMsg: 1643257      1643257
            # Tcp:     RtoAlgorithm RtoMin     RtoMax         MaxConn      ...
            # Tcp:     1            200        120000         -1           ...
            # Udp:     InDatagrams  NoPorts    InErrors       OutDatagrams ...
            # Udp:     24249494     1643257    0              25892947     ...
            # UdpLite: InDatagrams  Noports    InErrors       OutDatagrams ...
            # UdpLite: 0            0          0              0            ...
            try:
                lines = proc.readlines()
            finally:
                proc.close()

            tcp_lines = [line for line in lines if line.startswith('Tcp:')]
            udp_lines = [line for line in lines if line.startswith('Udp:')]

            tcp_column_names = tcp_lines[0].strip().split()
            tcp_values = tcp_lines[1].strip().split()
            tcp_metrics = dict(zip(tcp_column_names, tcp_values))

            udp_column_names = udp_lines[0].strip().split()
            udp_values = udp_lines[1].strip().split()
            udp_metrics = dict(zip(udp_column_names, udp_values))

            # line start indicating what kind of metrics we're looking at
            assert(tcp_metrics['Tcp:'] == 'Tcp:')

            tcp_metrics_name = {
                'RetransSegs': 'system.net.tcp.retrans_segs',
                'InSegs'     : 'system.net.tcp.in_segs',
                'OutSegs'    : 'system.net.tcp.out_segs'
            }

            for key, metric in tcp_metrics_name.iteritems():
                self.rate(metric, self._parse_value(tcp_metrics[key]))

            assert(udp_metrics['Udp:'] == 'Udp:')

            udp_metrics_name = {
                'InDatagrams': 'system.net.udp.in_datagrams',
                'NoPorts': 'system.net.udp.no_ports',
                'InErrors': 'system.net.udp.in_errors',
                'OutDatagrams': 'system.net.udp.out_datagrams',
                'RcvbufErrors': 'system.net.udp.rcv_buf_errors',
                'SndbufErrors': 'system.net.udp.snd_buf_errors'
            }
            for key, metric in udp_metrics_name.iteritems():
                if key in udp_metrics:
                    self.rate(metric, self._parse_value(udp_metrics[key]))

        except IOError:
            # On Openshift, /proc/net/snmp is only readable by root
            self.log.debug("Unable to read /proc/net/snmp.")
예제 #54
0
파일: unix.py 프로젝트: motusllc/dd-agent
    def check(self, agentConfig):
        """Return an aggregate of CPU stats across all CPUs
        When figures are not available, False is sent back.
        """
        def format_results(us, sy, wa, idle, st, guest=None):
            data = {'cpuUser': us, 'cpuSystem': sy, 'cpuWait': wa, 'cpuIdle': idle, 'cpuStolen': st, 'cpuGuest': guest}
            return dict((k, v) for k, v in data.iteritems() if v is not None)

        def get_value(legend, data, name, filter_value=None):
            "Using the legend and a metric name, get the value or None from the data line"
            if name in legend:
                value = to_float(data[legend.index(name)])
                if filter_value is not None:
                    if value > filter_value:
                        return None
                return value

            else:
                # FIXME return a float or False, would trigger type error if not python
                self.logger.debug("Cannot extract cpu value %s from %s (%s)" % (name, data, legend))
                return 0.0
        try:
            if Platform.is_linux():
                output, _, _ = get_subprocess_output(['mpstat', '1', '3'], self.logger)
                mpstat = output.splitlines()
                # topdog@ip:~$ mpstat 1 3
                # Linux 2.6.32-341-ec2 (ip)   01/19/2012  _x86_64_  (2 CPU)
                #
                # 04:22:41 PM  CPU    %usr   %nice    %sys %iowait    %irq   %soft  %steal  %guest   %idle
                # 04:22:42 PM  all    0.00    0.00    0.00    0.00    0.00    0.00    0.00    0.00  100.00
                # 04:22:43 PM  all    0.00    0.00    0.00    0.00    0.00    0.00    0.00    0.00  100.00
                # 04:22:44 PM  all    0.00    0.00    0.00    0.00    0.00    0.00    0.00    0.00  100.00
                # Average:     all    0.00    0.00    0.00    0.00    0.00    0.00    0.00    0.00  100.00
                #
                # OR
                #
                # Thanks to Mart Visser to spotting this one.
                # blah:/etc/dd-agent# mpstat
                # Linux 2.6.26-2-xen-amd64 (atira)  02/17/2012  _x86_64_
                #
                # 05:27:03 PM  CPU    %user   %nice   %sys %iowait    %irq   %soft  %steal  %idle   intr/s
                # 05:27:03 PM  all    3.59    0.00    0.68    0.69    0.00   0.00    0.01   95.03    43.65
                #
                legend = [l for l in mpstat if "%usr" in l or "%user" in l]
                avg = [l for l in mpstat if "Average" in l]
                if len(legend) == 1 and len(avg) == 1:
                    headers = [h for h in legend[0].split() if h not in ("AM", "PM")]
                    data = avg[0].split()

                    # Userland
                    # Debian lenny says %user so we look for both
                    # One of them will be 0
                    cpu_metrics = {
                        "%usr": None, "%user": None, "%nice": None,
                        "%iowait": None, "%idle": None, "%sys": None,
                        "%irq": None, "%soft": None, "%steal": None,
                        "%guest": None
                    }

                    for cpu_m in cpu_metrics:
                        cpu_metrics[cpu_m] = get_value(headers, data, cpu_m, filter_value=110)

                    if any([v is None for v in cpu_metrics.values()]):
                        self.logger.warning("Invalid mpstat data: %s" % data)

                    cpu_user = cpu_metrics["%usr"] + cpu_metrics["%user"] + cpu_metrics["%nice"]
                    cpu_system = cpu_metrics["%sys"] + cpu_metrics["%irq"] + cpu_metrics["%soft"]
                    cpu_wait = cpu_metrics["%iowait"]
                    cpu_idle = cpu_metrics["%idle"]
                    cpu_stolen = cpu_metrics["%steal"]
                    cpu_guest = cpu_metrics["%guest"]

                    return format_results(cpu_user,
                                          cpu_system,
                                          cpu_wait,
                                          cpu_idle,
                                          cpu_stolen,
                                          cpu_guest)
                else:
                    return False

            elif sys.platform == 'darwin':
                # generate 3 seconds of data
                # ['          disk0           disk1       cpu     load average', '    KB/t tps  MB/s     KB/t tps  MB/s  us sy id   1m   5m   15m', '   21.23  13  0.27    17.85   7  0.13  14  7 79  1.04 1.27 1.31', '    4.00   3  0.01     5.00   8  0.04  12 10 78  1.04 1.27 1.31', '']
                iostats, _, _ = get_subprocess_output(['iostat', '-C', '-w', '3', '-c', '2'], self.logger)
                lines = [l for l in iostats.splitlines() if len(l) > 0]
                legend = [l for l in lines if "us" in l]
                if len(legend) == 1:
                    headers = legend[0].split()
                    data = lines[-1].split()
                    cpu_user = get_value(headers, data, "us")
                    cpu_sys = get_value(headers, data, "sy")
                    cpu_wait = 0
                    cpu_idle = get_value(headers, data, "id")
                    cpu_st = 0
                    return format_results(cpu_user, cpu_sys, cpu_wait, cpu_idle, cpu_st)
                else:
                    self.logger.warn("Expected to get at least 4 lines of data from iostat instead of just " + str(iostats[:max(80, len(iostats))]))
                    return False

            elif sys.platform.startswith("freebsd"):
                # generate 3 seconds of data
                # tty            ada0              cd0            pass0             cpu
                # tin  tout  KB/t tps  MB/s   KB/t tps  MB/s   KB/t tps  MB/s  us ni sy in id
                # 0    69 26.71   0  0.01   0.00   0  0.00   0.00   0  0.00   2  0  0  1 97
                # 0    78  0.00   0  0.00   0.00   0  0.00   0.00   0  0.00   0  0  0  0 100
                iostats, _, _ = get_subprocess_output(['iostat', '-w', '3', '-c', '2'], self.logger)
                lines = [l for l in iostats.splitlines() if len(l) > 0]
                legend = [l for l in lines if "us" in l]
                if len(legend) == 1:
                    headers = legend[0].split()
                    data = lines[-1].split()
                    cpu_user = get_value(headers, data, "us")
                    cpu_nice = get_value(headers, data, "ni")
                    cpu_sys = get_value(headers, data, "sy")
                    cpu_intr = get_value(headers, data, "in")
                    cpu_wait = 0
                    cpu_idle = get_value(headers, data, "id")
                    cpu_stol = 0
                    return format_results(cpu_user + cpu_nice, cpu_sys + cpu_intr, cpu_wait, cpu_idle, cpu_stol)

                else:
                    self.logger.warn("Expected to get at least 4 lines of data from iostat instead of just " + str(iostats[:max(80, len(iostats))]))
                    return False

            elif sys.platform == 'sunos5':
                # mpstat -aq 1 2
                # SET minf mjf xcal  intr ithr  csw icsw migr smtx  srw syscl  usr sys  wt idl sze
                # 0 5239   0 12857 22969 5523 14628   73  546 4055    1 146856    5   6   0  89  24 <-- since boot
                # 1 ...
                # SET minf mjf xcal  intr ithr  csw icsw migr smtx  srw syscl  usr sys  wt idl sze
                # 0 20374   0 45634 57792 5786 26767   80  876 20036    2 724475   13  13   0  75  24 <-- past 1s
                # 1 ...
                # http://docs.oracle.com/cd/E23824_01/html/821-1462/mpstat-1m.html
                #
                # Will aggregate over all processor sets
                    output, _, _ = get_subprocess_output(['mpstat', '-aq', '1', '2'], self.logger)
                    mpstat = output.splitlines()
                    lines = [l for l in mpstat if len(l) > 0]
                    # discard the first len(lines)/2 lines
                    lines = lines[len(lines)/2:]
                    legend = [l for l in lines if "SET" in l]
                    assert len(legend) == 1
                    if len(legend) == 1:
                        headers = legend[0].split()
                        # collect stats for each processor set
                        # and aggregate them based on the relative set size
                        d_lines = [l for l in lines if "SET" not in l]
                        user = [get_value(headers, l.split(), "usr") for l in d_lines]
                        kern = [get_value(headers, l.split(), "sys") for l in d_lines]
                        wait = [get_value(headers, l.split(), "wt") for l in d_lines]
                        idle = [get_value(headers, l.split(), "idl") for l in d_lines]
                        size = [get_value(headers, l.split(), "sze") for l in d_lines]
                        count = sum(size)
                        rel_size = [s/count for s in size]
                        dot = lambda v1, v2: reduce(operator.add, map(operator.mul, v1, v2))
                        return format_results(dot(user, rel_size),
                                              dot(kern, rel_size),
                                              dot(wait, rel_size),
                                              dot(idle, rel_size),
                                              0.0)
            else:
                self.logger.warn("CPUStats: unsupported platform")
                return False
        except Exception:
            self.logger.exception("Cannot compute CPU stats")
            return False
예제 #55
0
파일: unix.py 프로젝트: motusllc/dd-agent
    def check(self, agentConfig):
        if Platform.is_linux():
            proc_location = agentConfig.get('procfs_path', '/proc').rstrip('/')
            try:
                proc_meminfo = "{}/meminfo".format(proc_location)
                with open(proc_meminfo, 'r') as mem_info:
                    lines = mem_info.readlines()
            except Exception:
                self.logger.exception('Cannot get memory metrics from %s', proc_meminfo)
                return False

            # NOTE: not all of the stats below are present on all systems as
            # not all kernel versions report all of them.
            #
            # $ cat /proc/meminfo
            # MemTotal:        7995360 kB
            # MemFree:         1045120 kB
            # MemAvailable:    1253920 kB
            # Buffers:          226284 kB
            # Cached:           775516 kB
            # SwapCached:       248868 kB
            # Active:          1004816 kB
            # Inactive:        1011948 kB
            # Active(anon):     455152 kB
            # Inactive(anon):   584664 kB
            # Active(file):     549664 kB
            # Inactive(file):   427284 kB
            # Unevictable:     4392476 kB
            # Mlocked:         4392476 kB
            # SwapTotal:      11120632 kB
            # SwapFree:       10555044 kB
            # Dirty:              2948 kB
            # Writeback:             0 kB
            # AnonPages:       5203560 kB
            # Mapped:            50520 kB
            # Shmem:             10108 kB
            # Slab:             161300 kB
            # SReclaimable:     136108 kB
            # SUnreclaim:        25192 kB
            # KernelStack:        3160 kB
            # PageTables:        26776 kB
            # NFS_Unstable:          0 kB
            # Bounce:                0 kB
            # WritebackTmp:          0 kB
            # CommitLimit:    15118312 kB
            # Committed_AS:    6703508 kB
            # VmallocTotal:   34359738367 kB
            # VmallocUsed:      400668 kB
            # VmallocChunk:   34359329524 kB
            # HardwareCorrupted:     0 kB
            # HugePages_Total:       0
            # HugePages_Free:        0
            # HugePages_Rsvd:        0
            # HugePages_Surp:        0
            # Hugepagesize:       2048 kB
            # DirectMap4k:       10112 kB
            # DirectMap2M:     8243200 kB

            regexp = re.compile(r'^(\w+):\s+([0-9]+)')  # We run this several times so one-time compile now
            meminfo = {}

            parse_error = False
            for line in lines:
                try:
                    match = re.search(regexp, line)
                    if match is not None:
                        meminfo[match.group(1)] = match.group(2)
                except Exception:
                    parse_error = True
            if parse_error:
                self.logger.error("Error parsing %s", proc_meminfo)

            memData = {}

            # Physical memory
            # FIXME units are in MB, we should use bytes instead
            try:
                memData['physTotal'] = int(meminfo.get('MemTotal', 0)) / 1024
                memData['physFree'] = int(meminfo.get('MemFree', 0)) / 1024
                memData['physBuffers'] = int(meminfo.get('Buffers', 0)) / 1024
                memData['physCached'] = int(meminfo.get('Cached', 0)) / 1024
                memData['physShared'] = int(meminfo.get('Shmem', 0)) / 1024
                memData['physSlab'] = int(meminfo.get('Slab', 0)) / 1024
                memData['physPageTables'] = int(meminfo.get('PageTables', 0)) / 1024
                memData['physUsed'] = memData['physTotal'] - memData['physFree']

                if 'MemAvailable' in meminfo:
                    memData['physUsable'] = int(meminfo.get('MemAvailable', 0)) / 1024
                else:
                    # Usable is relative since cached and buffers are actually used to speed things up.
                    memData['physUsable'] = memData['physFree'] + memData['physBuffers'] + memData['physCached']

                if memData['physTotal'] > 0:
                    memData['physPctUsable'] = float(memData['physUsable']) / float(memData['physTotal'])
            except Exception:
                self.logger.exception('Cannot compute stats from %s', proc_meminfo)

            # Swap
            # FIXME units are in MB, we should use bytes instead
            try:
                memData['swapTotal'] = int(meminfo.get('SwapTotal', 0)) / 1024
                memData['swapFree'] = int(meminfo.get('SwapFree', 0)) / 1024
                memData['swapCached'] = int(meminfo.get('SwapCached', 0)) / 1024

                memData['swapUsed'] = memData['swapTotal'] - memData['swapFree']

                if memData['swapTotal'] > 0:
                    memData['swapPctFree'] = float(memData['swapFree']) / float(memData['swapTotal'])
            except Exception:
                self.logger.exception('Cannot compute swap stats')

            return memData

        elif sys.platform == 'darwin':
            if psutil is None:
                self.logger.error("psutil must be installed on MacOS to collect memory metrics")
                return False

            phys_memory = psutil.virtual_memory()
            swap = psutil.swap_memory()
            return {'physUsed': phys_memory.used / float(1024**2),
                'physFree': phys_memory.free / float(1024**2),
                'physUsable': phys_memory.available / float(1024**2),
                'physPctUsable': (100 - phys_memory.percent) / 100.0,
                'swapUsed': swap.used / float(1024**2),
                'swapFree': swap.free / float(1024**2)}

        elif sys.platform.startswith("freebsd"):
            try:
                output, _, _ = get_subprocess_output(['sysctl', 'vm.stats.vm'], self.logger)
                sysctl = output.splitlines()
            except Exception:
                self.logger.exception('getMemoryUsage')
                return False

            # ...
            # vm.stats.vm.v_page_size: 4096
            # vm.stats.vm.v_page_count: 759884
            # vm.stats.vm.v_wire_count: 122726
            # vm.stats.vm.v_active_count: 109350
            # vm.stats.vm.v_cache_count: 17437
            # vm.stats.vm.v_inactive_count: 479673
            # vm.stats.vm.v_free_count: 30542
            # ...

            # We run this several times so one-time compile now
            regexp = re.compile(r'^vm\.stats\.vm\.(\w+):\s+([0-9]+)')
            meminfo = {}

            parse_error = False
            for line in sysctl:
                try:
                    match = re.search(regexp, line)
                    if match is not None:
                        meminfo[match.group(1)] = match.group(2)
                except Exception:
                    parse_error = True
            if parse_error:
                self.logger.error("Error parsing vm.stats.vm output: %s", sysctl)

            memData = {}

            # Physical memory
            try:
                pageSize = int(meminfo.get('v_page_size'))

                memData['physTotal'] = (int(meminfo.get('v_page_count', 0))
                                        * pageSize) / 1048576
                memData['physFree'] = (int(meminfo.get('v_free_count', 0))
                                       * pageSize) / 1048576
                memData['physCached'] = (int(meminfo.get('v_cache_count', 0))
                                         * pageSize) / 1048576
                memData['physUsed'] = ((int(meminfo.get('v_active_count'), 0) +
                                        int(meminfo.get('v_wire_count', 0)))
                                       * pageSize) / 1048576
                memData['physUsable'] = ((int(meminfo.get('v_free_count'), 0) +
                                          int(meminfo.get('v_cache_count', 0)) +
                                          int(meminfo.get('v_inactive_count', 0))) *
                                         pageSize) / 1048576

                if memData['physTotal'] > 0:
                    memData['physPctUsable'] = float(memData['physUsable']) / float(memData['physTotal'])
            except Exception:
                self.logger.exception('Cannot compute stats from %s', proc_meminfo)

            # Swap
            try:
                output, _, _ = get_subprocess_output(['swapinfo', '-m'], self.logger)
                sysctl = output.splitlines()
            except Exception:
                self.logger.exception('getMemoryUsage')
                return False

            # ...
            # Device          1M-blocks     Used    Avail Capacity
            # /dev/ad0s1b           570        0      570     0%
            # ...

            assert "Device" in sysctl[0]

            try:
                memData['swapTotal'] = 0
                memData['swapFree'] = 0
                memData['swapUsed'] = 0
                for line in sysctl[1:]:
                    if len(line) > 0:
                        line = line.split()
                        memData['swapTotal'] += int(line[1])
                        memData['swapFree'] += int(line[3])
                        memData['swapUsed'] += int(line[2])
            except Exception:
                self.logger.exception('Cannot compute stats from swapinfo')

            return memData
        elif sys.platform == 'sunos5':
            try:
                memData = {}
                cmd = ["kstat", "-m", "memory_cap", "-c", "zone_memory_cap", "-p"]
                output, _, _ = get_subprocess_output(cmd, self.logger)
                kmem = output.splitlines()

                # memory_cap:360:53aa9b7e-48ba-4152-a52b-a6368c:anon_alloc_fail   0
                # memory_cap:360:53aa9b7e-48ba-4152-a52b-a6368c:anonpgin  0
                # memory_cap:360:53aa9b7e-48ba-4152-a52b-a6368c:class     zone_memory_cap
                # memory_cap:360:53aa9b7e-48ba-4152-a52b-a6368c:crtime    16359935.0680834
                # memory_cap:360:53aa9b7e-48ba-4152-a52b-a6368c:execpgin  185
                # memory_cap:360:53aa9b7e-48ba-4152-a52b-a6368c:fspgin    2556
                # memory_cap:360:53aa9b7e-48ba-4152-a52b-a6368c:n_pf_throttle     0
                # memory_cap:360:53aa9b7e-48ba-4152-a52b-a6368c:n_pf_throttle_usec        0
                # memory_cap:360:53aa9b7e-48ba-4152-a52b-a6368c:nover     0
                # memory_cap:360:53aa9b7e-48ba-4152-a52b-a6368c:pagedout  0
                # memory_cap:360:53aa9b7e-48ba-4152-a52b-a6368c:pgpgin    2741
                # memory_cap:360:53aa9b7e-48ba-4152-a52b-a6368c:physcap   536870912  <--
                # memory_cap:360:53aa9b7e-48ba-4152-a52b-a6368c:rss       115544064  <--
                # memory_cap:360:53aa9b7e-48ba-4152-a52b-a6368c:snaptime  16787393.9439095
                # memory_cap:360:53aa9b7e-48ba-4152-a52b-a6368c:swap      91828224   <--
                # memory_cap:360:53aa9b7e-48ba-4152-a52b-a6368c:swapcap   1073741824 <--
                # memory_cap:360:53aa9b7e-48ba-4152-a52b-a6368c:zonename  53aa9b7e-48ba-4152-a52b-a6368c3d9e7c

                # turn memory_cap:360:zone_name:key value
                # into { "key": value, ...}
                kv = [l.strip().split() for l in kmem if len(l) > 0]
                entries = dict([(k.split(":")[-1], v) for (k, v) in kv])
                # extract rss, physcap, swap, swapcap, turn into MB
                convert = lambda v: int(long(v))/2**20
                memData["physTotal"] = convert(entries["physcap"])
                memData["physUsed"] = convert(entries["rss"])
                memData["physFree"] = memData["physTotal"] - memData["physUsed"]
                memData["swapTotal"] = convert(entries["swapcap"])
                memData["swapUsed"] = convert(entries["swap"])
                memData["swapFree"] = memData["swapTotal"] - memData["swapUsed"]

                if memData['swapTotal'] > 0:
                    memData['swapPctFree'] = float(memData['swapFree']) / float(memData['swapTotal'])
                return memData
            except Exception:
                self.logger.exception("Cannot compute mem stats from kstat -c zone_memory_cap")
                return False
        else:
            return False
예제 #56
0
파일: unix.py 프로젝트: motusllc/dd-agent
    def check(self, agentConfig):
        """Capture io stats.

        @rtype dict
        @return {"device": {"metric": value, "metric": value}, ...}
        """
        io = {}
        try:
            if Platform.is_linux():
                stdout, _, _ = get_subprocess_output(['iostat', '-d', '1', '2', '-x', '-k'], self.logger)

                #                 Linux 2.6.32-343-ec2 (ip-10-35-95-10)   12/11/2012      _x86_64_        (2 CPU)
                #
                # Device:         rrqm/s   wrqm/s     r/s     w/s    rkB/s    wkB/s avgrq-sz avgqu-sz   await  svctm  %util
                # sda1              0.00    17.61    0.26   32.63     4.23   201.04    12.48     0.16    4.81   0.53   1.73
                # sdb               0.00     2.68    0.19    3.84     5.79    26.07    15.82     0.02    4.93   0.22   0.09
                # sdg               0.00     0.13    2.29    3.84   100.53    30.61    42.78     0.05    8.41   0.88   0.54
                # sdf               0.00     0.13    2.30    3.84   100.54    30.61    42.78     0.06    9.12   0.90   0.55
                # md0               0.00     0.00    0.05    3.37     1.41    30.01    18.35     0.00    0.00   0.00   0.00
                #
                # Device:         rrqm/s   wrqm/s     r/s     w/s    rkB/s    wkB/s avgrq-sz avgqu-sz   await  svctm  %util
                # sda1              0.00     0.00    0.00   10.89     0.00    43.56     8.00     0.03    2.73   2.73   2.97
                # sdb               0.00     0.00    0.00    2.97     0.00    11.88     8.00     0.00    0.00   0.00   0.00
                # sdg               0.00     0.00    0.00    0.00     0.00     0.00     0.00     0.00    0.00   0.00   0.00
                # sdf               0.00     0.00    0.00    0.00     0.00     0.00     0.00     0.00    0.00   0.00   0.00
                # md0               0.00     0.00    0.00    0.00     0.00     0.00     0.00     0.00    0.00   0.00   0.00
                io.update(self._parse_linux2(stdout))

            elif sys.platform == "sunos5":
                output, _, _ = get_subprocess_output(["iostat", "-x", "-d", "1", "2"], self.logger)
                iostat = output.splitlines()

                #                   extended device statistics <-- since boot
                # device      r/s    w/s   kr/s   kw/s wait actv  svc_t  %w  %b
                # ramdisk1    0.0    0.0    0.1    0.1  0.0  0.0    0.0   0   0
                # sd0         0.0    0.0    0.0    0.0  0.0  0.0    0.0   0   0
                # sd1        79.9  149.9 1237.6 6737.9  0.0  0.5    2.3   0  11
                #                   extended device statistics <-- past second
                # device      r/s    w/s   kr/s   kw/s wait actv  svc_t  %w  %b
                # ramdisk1    0.0    0.0    0.0    0.0  0.0  0.0    0.0   0   0
                # sd0         0.0    0.0    0.0    0.0  0.0  0.0    0.0   0   0
                # sd1         0.0  139.0    0.0 1850.6  0.0  0.0    0.1   0   1

                # discard the first half of the display (stats since boot)
                lines = [l for l in iostat if len(l) > 0]
                lines = lines[len(lines)/2:]

                assert "extended device statistics" in lines[0]
                headers = lines[1].split()
                assert "device" in headers
                for l in lines[2:]:
                    cols = l.split()
                    # cols[0] is the device
                    # cols[1:] are the values
                    io[cols[0]] = {}
                    for i in range(1, len(cols)):
                        io[cols[0]][self.xlate(headers[i], "sunos")] = cols[i]

            elif sys.platform.startswith("freebsd"):
                output, _, _ = get_subprocess_output(["iostat", "-x", "-d", "1", "2"], self.logger)
                iostat = output.splitlines()

                # Be careful!
                # It looks like SunOS, but some columms (wait, svc_t) have different meaning
                #                        extended device statistics
                # device     r/s   w/s    kr/s    kw/s wait svc_t  %b
                # ad0        3.1   1.3    49.9    18.8    0   0.7   0
                #                         extended device statistics
                # device     r/s   w/s    kr/s    kw/s wait svc_t  %b
                # ad0        0.0   2.0     0.0    31.8    0   0.2   0

                # discard the first half of the display (stats since boot)
                lines = [l for l in iostat if len(l) > 0]
                lines = lines[len(lines)/2:]

                assert "extended device statistics" in lines[0]
                headers = lines[1].split()
                assert "device" in headers
                for l in lines[2:]:
                    cols = l.split()
                    # cols[0] is the device
                    # cols[1:] are the values
                    io[cols[0]] = {}
                    for i in range(1, len(cols)):
                        io[cols[0]][self.xlate(headers[i], "freebsd")] = cols[i]
            elif sys.platform == 'darwin':
                iostat, _, _ = get_subprocess_output(['iostat', '-d', '-c', '2', '-w', '1'], self.logger)
                #          disk0           disk1          <-- number of disks
                #    KB/t tps  MB/s     KB/t tps  MB/s
                #   21.11  23  0.47    20.01   0  0.00
                #    6.67   3  0.02     0.00   0  0.00    <-- line of interest
                io = self._parse_darwin(iostat)
            else:
                return False

            # If we filter devices, do it know.
            device_blacklist_re = agentConfig.get('device_blacklist_re', None)
            if device_blacklist_re:
                filtered_io = {}
                for device, stats in io.iteritems():
                    if not device_blacklist_re.match(device):
                        filtered_io[device] = stats
            else:
                filtered_io = io
            return filtered_io

        except Exception:
            self.logger.exception("Cannot extract IO statistics")
            return False
예제 #57
0
파일: network.py 프로젝트: ross/dd-agent
    def _check_bsd(self, instance):
        netstat_flags = ['-i', '-b']

        # FreeBSD's netstat truncates device names unless you pass '-W'
        if Platform.is_freebsd():
            netstat_flags.append('-W')

        try:
            output, _, _ = get_subprocess_output(["netstat"] + netstat_flags, self.log)
            lines = output.splitlines()
            # Name  Mtu   Network       Address            Ipkts Ierrs     Ibytes    Opkts Oerrs     Obytes  Coll
            # lo0   16384 <Link#1>                        318258     0  428252203   318258     0  428252203     0
            # lo0   16384 localhost   fe80:1::1           318258     -  428252203   318258     -  428252203     -
            # lo0   16384 127           localhost         318258     -  428252203   318258     -  428252203     -
            # lo0   16384 localhost   ::1                 318258     -  428252203   318258     -  428252203     -
            # gif0* 1280  <Link#2>                             0     0          0        0     0          0     0
            # stf0* 1280  <Link#3>                             0     0          0        0     0          0     0
            # en0   1500  <Link#4>    04:0c:ce:db:4e:fa 20801309     0 13835457425 15149389     0 11508790198     0
            # en0   1500  seneca.loca fe80:4::60c:ceff: 20801309     - 13835457425 15149389     - 11508790198     -
            # en0   1500  2001:470:1f 2001:470:1f07:11d 20801309     - 13835457425 15149389     - 11508790198     -
            # en0   1500  2001:470:1f 2001:470:1f07:11d 20801309     - 13835457425 15149389     - 11508790198     -
            # en0   1500  192.168.1     192.168.1.63    20801309     - 13835457425 15149389     - 11508790198     -
            # en0   1500  2001:470:1f 2001:470:1f07:11d 20801309     - 13835457425 15149389     - 11508790198     -
            # p2p0  2304  <Link#5>    06:0c:ce:db:4e:fa        0     0          0        0     0          0     0
            # ham0  1404  <Link#6>    7a:79:05:4d:bf:f5    30100     0    6815204    18742     0    8494811     0
            # ham0  1404  5             5.77.191.245       30100     -    6815204    18742     -    8494811     -
            # ham0  1404  seneca.loca fe80:6::7879:5ff:    30100     -    6815204    18742     -    8494811     -
            # ham0  1404  2620:9b::54 2620:9b::54d:bff5    30100     -    6815204    18742     -    8494811     -

            headers = lines[0].split()

            # Given the irregular structure of the table above, better to parse from the end of each line
            # Verify headers first
            #          -7       -6       -5        -4       -3       -2        -1
            for h in ("Ipkts", "Ierrs", "Ibytes", "Opkts", "Oerrs", "Obytes", "Coll"):
                if h not in headers:
                    self.logger.error("%s not found in %s; cannot parse" % (h, headers))
                    return False

            current = None
            for l in lines[1:]:
                # Another header row, abort now, this is IPv6 land
                if "Name" in l:
                    break

                x = l.split()
                if len(x) == 0:
                    break

                iface = x[0]
                if iface.endswith("*"):
                    iface = iface[:-1]
                if iface == current:
                    # skip multiple lines of same interface
                    continue
                else:
                    current = iface

                # Filter inactive interfaces
                if self._parse_value(x[-5]) or self._parse_value(x[-2]):
                    iface = current
                    metrics = {
                        'bytes_rcvd': self._parse_value(x[-5]),
                        'bytes_sent': self._parse_value(x[-2]),
                        'packets_in.count': self._parse_value(x[-7]),
                        'packets_in.error': self._parse_value(x[-6]),
                        'packets_out.count': self._parse_value(x[-4]),
                        'packets_out.error':self._parse_value(x[-3]),
                    }
                    self._submit_devicemetrics(iface, metrics)
        except SubprocessOutputEmptyError:
            self.log.exception("Error collecting connection stats.")


        try:
            netstat, _, _ = get_subprocess_output(["netstat", "-s", "-p" "tcp"], self.log)
            #3651535 packets sent
            #        972097 data packets (615753248 bytes)
            #        5009 data packets (2832232 bytes) retransmitted
            #        0 resends initiated by MTU discovery
            #        2086952 ack-only packets (471 delayed)
            #        0 URG only packets
            #        0 window probe packets
            #        310851 window update packets
            #        336829 control packets
            #        0 data packets sent after flow control
            #        3058232 checksummed in software
            #        3058232 segments (571218834 bytes) over IPv4
            #        0 segments (0 bytes) over IPv6
            #4807551 packets received
            #        1143534 acks (for 616095538 bytes)
            #        165400 duplicate acks
            #        ...

            self._submit_regexed_values(netstat, BSD_TCP_METRICS)
        except SubprocessOutputEmptyError:
            self.log.exception("Error collecting TCP stats.")
예제 #58
0
    def _populate_payload_metadata(self, payload, check_statuses, start_event=True):
        """
        Periodically populate the payload with metadata related to the system, host, and/or checks.
        """
        now = time.time()

        # Include system stats on first postback
        if start_event and self._is_first_run():
            payload['systemStats'] = self.agentConfig.get('system_stats', {})
            # Also post an event in the newsfeed
            payload['events']['System'] = [{
                'api_key': self.agentConfig['api_key'],
                'host': payload['internalHostname'],
                'timestamp': now,
                'event_type':'Agent Startup',
                'msg_text': 'Version %s' % get_version()
            }]

        # Periodically send the host metadata.
        if self._should_send_additional_data('host_metadata'):
            # gather metadata with gohai
            try:
                if not Platform.is_windows():
                    command = "gohai"
                else:
                    command = "gohai\gohai.exe"
                gohai_metadata, gohai_err, _ = get_subprocess_output([command], log)
                payload['gohai'] = gohai_metadata
                if gohai_err:
                    log.warning("GOHAI LOG | {0}".format(gohai_err))
            except OSError as e:
                if e.errno == 2:  # file not found, expected when install from source
                    log.info("gohai file not found")
                else:
                    raise e
            except Exception as e:
                log.warning("gohai command failed with error %s" % str(e))

            payload['systemStats'] = get_system_stats()
            payload['meta'] = self._get_hostname_metadata()

            self.hostname_metadata_cache = payload['meta']
            # Add static tags from the configuration file
            host_tags = []
            if self.agentConfig['tags'] is not None:
                host_tags.extend([unicode(tag.strip())
                                 for tag in self.agentConfig['tags'].split(",")])

            if self.agentConfig['collect_ec2_tags']:
                host_tags.extend(EC2.get_tags(self.agentConfig))

            if host_tags:
                payload['host-tags']['system'] = host_tags

            GCE_tags = GCE.get_tags(self.agentConfig)
            if GCE_tags is not None:
                payload['host-tags'][GCE.SOURCE_TYPE_NAME] = GCE_tags

            # Log the metadata on the first run
            if self._is_first_run():
                log.info("Hostnames: %s, tags: %s" %
                         (repr(self.hostname_metadata_cache), payload['host-tags']))

        # Periodically send extra hosts metadata (vsphere)
        # Metadata of hosts that are not the host where the agent runs, not all the checks use
        # that
        external_host_tags = []
        if self._should_send_additional_data('external_host_tags'):
            for check in self.initialized_checks_d:
                try:
                    getter = getattr(check, 'get_external_host_tags')
                    check_tags = getter()
                    external_host_tags.extend(check_tags)
                except AttributeError:
                    pass

        if external_host_tags:
            payload['external_host_tags'] = external_host_tags

        # Periodically send agent_checks metadata
        if self._should_send_additional_data('agent_checks'):
            # Add agent checks statuses and error/warning messages
            agent_checks = []
            for check in check_statuses:
                if check.instance_statuses is not None:
                    for i, instance_status in enumerate(check.instance_statuses):
                        agent_checks.append(
                            (
                                check.name, check.source_type_name,
                                instance_status.instance_id,
                                instance_status.status,
                                # put error message or list of warning messages in the same field
                                # it will be handled by the UI
                                instance_status.error or instance_status.warnings or "",
                                check.service_metadata[i]
                            )
                        )
                else:
                    agent_checks.append(
                        (
                            check.name, check.source_type_name,
                            "initialization",
                            check.status, repr(check.init_failed_error)
                        )
                    )
            payload['agent_checks'] = agent_checks
            payload['meta'] = self.hostname_metadata_cache  # add hostname metadata

        # If required by the user, let's create the dd_check:xxx host tags
        if self.agentConfig['create_dd_check_tags'] and \
                self._should_send_additional_data('dd_check_tags'):
            app_tags_list = [DD_CHECK_TAG.format(c.name) for c in self.initialized_checks_d]
            app_tags_list.extend([DD_CHECK_TAG.format(cname) for cname
                                  in JMXFiles.get_jmx_appnames()])

            if 'system' not in payload['host-tags']:
                payload['host-tags']['system'] = []

            payload['host-tags']['system'].extend(app_tags_list)