def add_stack_traces(self, app_name, key_txn, txn_type, stack_traces):
        """Adds a list of stack_traces to a praticular xray profile session's
        call tree. This is called at the end of a transaction when it's name is
        frozen and it is identified as an xray transaction.

        """

        try:
            xray_profile_sessions = self.application_xrays[app_name]
            xps = xray_profile_sessions[key_txn]
        except KeyError:
            # Invalid xray_session name
            return False

        count = 0

        with self._lock:
            start = time.time()

            for stack_trace in stack_traces:
                count += len(stack_trace)
                xps.update_call_tree(txn_type, stack_trace)
                xps.sample_count += 1

            end = time.time() - start
            self._aggregation_time += end

        internal_metric('Supportability/Python/Profiling/Counts/stack_frames',
                count)

        return True
Exemplo n.º 2
0
    def record_transaction(self, data):
        """Record a single transaction against this application."""

        if not self._active_session:
            return

        if self._stats_engine.settings is None:
            return

        # Do checks to see whether trying to record a transaction in a
        # different process to that the application was activated in.

        self.validate_process()

        internal_metrics = ValueMetrics()

        with InternalTraceContext(internal_metrics):
            try:
                # We accumulate stats into a workarea and only then merge it
                # into the main one under a thread lock. Do this to ensure
                # that the process of generating the metrics into the stats
                # don't unecessarily lock out another thread.

                stats = self._stats_engine.create_workarea()
                stats.record_transaction(data)

            except Exception:
                _logger.exception('The generation of transaction data has '
                        'failed. This would indicate some sort of internal '
                        'implementation issue with the agent. Please report '
                        'this problem to New Relic support for further '
                        'investigation.')

            with self._stats_lock:
                try:
                    self._transaction_count += 1
                    self._last_transaction = data.end_time

                    internal_metric('Supportability/Transaction/Counts/'
                            'metric_data', stats.metric_data_count())

                    self._stats_engine.merge_metric_stats(stats)
                    self._stats_engine.merge_other_stats(stats)

                    # We merge the internal statistics here as well even
                    # though have popped out of the context where we are
                    # recording. This is okay so long as don't record
                    # anything else after this point. If we do then that
                    # data will not be recorded.

                    self._stats_engine.merge_value_metrics(
                            internal_metrics.metrics())

                except Exception:
                    _logger.exception('The merging of transaction data has '
                            'failed. This would indicate some sort of '
                            'internal implementation issue with the agent. '
                            'Please report this problem to New Relic support '
                            'for further investigation.')
Exemplo n.º 3
0
    def slow_transaction_data(self):
        """Returns a list containing any slow transaction data collected
        during the reporting period.

        NOTE Currently only the slowest transaction for the reporting
        period is retained.

        """

        if not self.__settings:
            return []

        if not self.__slow_transaction:
            return []

        maximum = self.__settings.agent_limits.transaction_traces_nodes

        transaction_trace = self.__slow_transaction.transaction_trace(
                self, maximum)

        internal_metric('Supportability/StatsEngine/Counts/'
                'transaction_sample_data',
                self.__slow_transaction.trace_node_count)

        data = [transaction_trace,
                list(self.__slow_transaction.string_table.values())]

        if self.__settings.debug.log_transaction_trace_payload:
            _logger.debug('Encoding slow transaction data where '
                    'payload=%r.', data)

        with InternalTrace('Supportability/StatsEngine/JSON/Encode/'
                'transaction_sample_data'):

            json_data = json_encode(data)

        internal_metric('Supportability/StatsEngine/ZLIB/Bytes/'
                'transaction_sample_data', len(json_data))

        with InternalTrace('Supportability/StatsEngine/ZLIB/Compress/'
                'transaction_sample_data'):
            zlib_data = zlib.compress(six.b(json_data))

        with InternalTrace('Supportability/StatsEngine/BASE64/Encode/'
                'transaction_sample_data'):
            pack_data = base64.standard_b64encode(zlib_data)

            if six.PY3:
                pack_data = pack_data.decode('Latin-1')

        root = transaction_trace.root

        trace_data = [[root.start_time,
                root.end_time - root.start_time,
                self.__slow_transaction.path,
                self.__slow_transaction.request_uri,
                pack_data]]

        return trace_data
    def slow_transaction_data(self):
        """Returns a list containing any slow transaction data collected
        during the reporting period.

        NOTE Currently only the slowest transaction for the reporting
        period is retained.

        """

        if not self.__settings:
            return []

        if not self.__slow_transaction:
            return []

        maximum = self.__settings.agent_limits.transaction_traces_nodes

        transaction_trace = self.__slow_transaction.transaction_trace(
                self, maximum)

        internal_metric('Supportability/StatsEngine/Counts/'
                'transaction_sample_data',
                self.__slow_transaction.trace_node_count)

        data = [transaction_trace,
                list(self.__slow_transaction.string_table.values())]

        if self.__settings.debug.log_transaction_trace_payload:
            _logger.debug('Encoding slow transaction data where '
                    'payload=%r.', data)

        with InternalTrace('Supportability/StatsEngine/JSON/Encode/'
                'transaction_sample_data'):

            json_data = simplejson.dumps(data, ensure_ascii=True,
                    encoding='Latin-1', namedtuple_as_object=False,
                    default=lambda o: list(iter(o)))

        internal_metric('Supportability/StatsEngine/ZLIB/Bytes/'
                'transaction_sample_data', len(json_data))

        with InternalTrace('Supportability/StatsEngine/ZLIB/Compress/'
                'transaction_sample_data'):
            zlib_data = zlib.compress(six.b(json_data))

        with InternalTrace('Supportability/StatsEngine/BASE64/Encode/'
                'transaction_sample_data'):
            pack_data = base64.standard_b64encode(zlib_data)

        root = transaction_trace.root

        trace_data = [[root.start_time,
                root.end_time - root.start_time,
                self.__slow_transaction.path,
                self.__slow_transaction.request_uri,
                pack_data]]

        return trace_data
Exemplo n.º 5
0
    def connection(self, database, args, kwargs):
        """

        :param SQLDatabase database:
        :param args:
        :param kwargs:
        :return:
        """
        key = (database.client, args, kwargs)

        connection = None

        settings = global_settings()

        for i, item in enumerate(self.connections):
            if item[0] == key:
                connection = item[1]

                # Move to back of list so we know which is the
                # most recently used all the time.

                item = self.connections.pop(i)
                self.connections.append(item)

                break

        if connection is None:
            # If we are at the maximum number of connections to
            # keep hold of, pop the one which has been used the
            # longest amount of time.

            if len(self.connections) == self.maximum:
                connection = self.connections.pop(0)[1]

                internal_metric(
                    'Supportability/Python/DatabaseUtils/Counts/'
                    'drop_database_connection', 1)

                if settings.debug.log_explain_plan_queries:
                    _logger.debug(
                        'Drop database connection for %r as '
                        'reached maximum of %r.', connection.database.client,
                        self.maximum)

                connection.cleanup()

            connection = SQLConnection(database,
                                       database.connect(*args, **kwargs))

            self.connections.append((key, connection))

            if settings.debug.log_explain_plan_queries:
                _logger.debug('Created database connection for %r.',
                              database.client)

        return connection
Exemplo n.º 6
0
    def normalize(self, path, data):
        if not data:
            return None

        stripped = data.strip()

        if self.valid_length(stripped) and self.valid_chars(stripped):
            result = stripped
        else:
            internal_metric("Supportability/utilization/aws/error", 1)
            _logger.warning('Fetched invalid AWS data for "%r": %r', path, data)
            result = None

        return result
Exemplo n.º 7
0
    def connection(self, database, args, kwargs):
        key = (database.client, args, kwargs)

        connection = None

        settings = global_settings()

        for i, item in enumerate(self.connections):
            if item[0] == key:
                connection = item[1]

                # Move to back of list so we know which is the
                # most recently used all the time.

                item = self.connections.pop(i)
                self.connections.append(item)

                break

        if connection is None:
            # If we are at the maximum number of connections to
            # keep hold of, pop the one which has been used the
            # longest amount of time.

            if len(self.connections) == self.maximum:
                connection = self.connections.pop(0)[1]

                internal_metric('Supportability/DatabaseUtils/Counts/'
                                'drop_database_connection', 1)

                if settings.debug.log_explain_plan_queries:
                    _logger.debug('Drop database connection for %r as '
                            'reached maximum of %r.',
                            connection.database.client, self.maximum)

                connection.cleanup()

            connection = SQLConnection(database,
                    database.connect(*args, **kwargs))

            self.connections.append((key, connection))

            internal_metric('Supportability/DatabaseUtils/Counts/'
                            'create_database_connection', 1)

            if settings.debug.log_explain_plan_queries:
                _logger.debug('Created database connection for %r.',
                        database.client)

        return connection
Exemplo n.º 8
0
    def normalize(self, path, data):
        if not data:
            return None

        stripped = data.strip()

        if self.valid_length(stripped) and self.valid_chars(stripped):
            result = stripped
        else:
            internal_metric('Supportability/utilization/aws/error', 1)
            _logger.warning('Fetched invalid AWS data for "%r": %r', path,
                            data)
            result = None

        return result
Exemplo n.º 9
0
    def _supportability_request(params, payload, body, compression_time):
        # *********
        # Used only for supportability metrics. Do not use to drive business
        # logic!
        agent_method = params and params.get("method")
        # *********

        if agent_method and body:
            # Compression was applied
            if compression_time is not None:
                internal_metric(
                    "Supportability/Python/Collector/ZLIB/Bytes/%s" %
                    agent_method,
                    len(payload),
                )
                internal_metric(
                    "Supportability/Python/Collector/ZLIB/Compress/%s" %
                    agent_method,
                    compression_time,
                )

            internal_metric(
                "Supportability/Python/Collector/Output/Bytes/%s" %
                agent_method,
                len(body),
            )
Exemplo n.º 10
0
def _process_cgroup_info(cgroup_info):
    """Parses the Docker container id from cgroup info.

    Arguments:
      cgroup_info: An iterable where each item is a line of a cgroup file.

    Returns:
      Dock container id or None if it can't be determined.

    """

    cgroup_ids = _parse_cgroup_ids(cgroup_info)
    cpu_cgroup = cgroup_ids.get('cpu', '')

    native_no_sysd_p = '^/docker/(?P<native_no_sysd>[0-9a-f]+)$'
    native_sysd_p = '^/system.slice/docker-(?P<native_sysd>[0-9a-f]+).scope$'
    lxc_p = '^/lxc/(?P<lxc>[0-9a-f]+)$'
    docker_id_p = '|'.join([native_no_sysd_p, native_sysd_p, lxc_p])

    match = re.match(docker_id_p, cpu_cgroup)
    if match:
        container_id = next((m for m in match.groups() if m is not None))
    elif cpu_cgroup == '/':
        container_id = None
    else:
        _logger.debug("Ignoring unrecognized cgroup ID format: '%s'" %
                      (cpu_cgroup))
        container_id = None

    if (container_id and not _validate_docker_container_id(container_id)):
        container_id = None
        _logger.warning("Docker cgroup ID does not validate: '%s'" %
                        container_id)
        # As per spec
        internal_metric('Supportability/utilization/docker/error', 1)

    return container_id
def _process_cgroup_info(cgroup_info):
    """Parses the Docker container id from cgroup info.

    Arguments:
      cgroup_info: An iterable where each item is a line of a cgroup file.

    Returns:
      Dock container id or None if it can't be determined.

    """

    cgroup_ids = _parse_cgroup_ids(cgroup_info)
    cpu_cgroup = cgroup_ids.get('cpu', '')

    native_no_sysd_p = '^/docker/(?P<native_no_sysd>[0-9a-f]+)$'
    native_sysd_p = '^/system.slice/docker-(?P<native_sysd>[0-9a-f]+).scope$'
    lxc_p = '^/lxc/(?P<lxc>[0-9a-f]+)$'
    docker_id_p = '|'.join([native_no_sysd_p, native_sysd_p, lxc_p])

    match = re.match(docker_id_p, cpu_cgroup)
    if match:
        container_id = next((m for m in match.groups() if m is not None))
    elif cpu_cgroup == '/':
        container_id = None
    else:
        _logger.debug("Ignoring unrecognized cgroup ID format: '%s'" %
                (cpu_cgroup))
        container_id = None

    if (container_id and not _validate_docker_container_id(container_id)):
        container_id = None
        _logger.warning("Docker cgroup ID does not validate: '%s'" %
                container_id)
        internal_metric('Supportability/utilization/docker/error', 1)

    return container_id
Exemplo n.º 12
0
    def transaction_trace_data(self):
        """Returns a list of slow transaction data collected
        during the reporting period.

        """
        if not self.__settings:
            return []

        if (not self.__slow_transaction) and (not self.__saved_transactions):
            return []

        trace_data = []
        maximum = self.__settings.agent_limits.transaction_traces_nodes
        traces = list(self.__saved_transactions)

        if (self.__slow_transaction is not None and
                self.__slow_transaction not in traces):
            traces.append(self.__slow_transaction)

        for trace in traces:
            transaction_trace = trace.transaction_trace(self, maximum)

            internal_metric('Supportability/StatsEngine/Counts/'
                            'transaction_sample_data',
                            trace.trace_node_count)

            data = [transaction_trace,
                    trace.string_table.values()]

            if self.__settings.debug.log_transaction_trace_payload:
                _logger.debug('Encoding slow transaction data where '
                              'payload=%r.', data)

            with InternalTrace('Supportability/StatsEngine/JSON/Encode/'
                               'transaction_sample_data'):

                json_data = simplejson.dumps(data, ensure_ascii=True,
                        encoding='Latin-1', namedtuple_as_object=False,
                        default=lambda o: list(iter(o)))

            internal_metric('Supportability/StatsEngine/ZLIB/Bytes/'
                            'transaction_sample_data', len(json_data))

            with InternalTrace('Supportability/StatsEngine/ZLIB/Compress/'
                               'transaction_sample_data'):
                zlib_data = zlib.compress(json_data)

            with InternalTrace('Supportability/StatsEngine/BASE64/Encode/'
                               'transaction_sample_data'):
                pack_data = base64.standard_b64encode(zlib_data)

            root = transaction_trace.root
            force_persist = trace.guid is not None

            trace_data.append([root.start_time,
                    root.end_time - root.start_time,
                    trace.path,
                    trace.request_uri,
                    pack_data,
                    trace.guid,
                    None,
                    force_persist])

        return trace_data
Exemplo n.º 13
0
def send_request(session,
                 url,
                 method,
                 license_key,
                 agent_run_id=None,
                 payload=()):
    """Constructs and sends a request to the data collector."""

    params = {}
    headers = {}
    config = {}

    settings = global_settings()

    start = time.time()

    # Validate that the license key was actually set and if not replace
    # it with a string which makes it more obvious it was not set.

    if not license_key:
        license_key = 'NO LICENSE KEY WAS SET IN AGENT CONFIGURATION'

    # The agent formats requests and is able to handle responses for
    # protocol version 12.

    params['method'] = method
    params['license_key'] = license_key
    params['protocol_version'] = '12'
    params['marshal_format'] = 'json'

    if agent_run_id:
        params['run_id'] = str(agent_run_id)

    headers['User-Agent'] = USER_AGENT
    headers['Content-Encoding'] = 'identity'

    # Set up definitions for proxy server in case that has been set.

    proxies = proxy_server()

    # At this time we use JSON content encoding for the data being sent.
    # If an error does occur when encoding the JSON, then it isn't
    # likely going to work later on in a subsequent request with same
    # data, even if aggregated with other data, so we need to log the
    # details and then flag that data should be thrown away. Don't mind
    # being noisy in the the log in this situation as it would indicate
    # a problem with the implementation of the agent.

    try:
        with InternalTrace('Supportability/Collector/JSON/Encode/%s' % method):
            data = json_encode(payload)

    except Exception:
        _logger.exception(
            'Error encoding data for JSON payload for '
            'method %r with payload of %r. Please report this problem '
            'to New Relic support.', method, payload)

        raise DiscardDataForRequest(str(sys.exc_info()[1]))

    # Log details of call and/or payload for debugging. Use the JSON
    # encoded value so know that what is encoded is correct.

    if settings.debug.log_data_collector_payloads:
        _logger.debug(
            'Calling data collector with url=%r, method=%r and '
            'payload=%r.', url, method, data)
    elif settings.debug.log_data_collector_calls:
        _logger.debug('Calling data collector with url=%r and method=%r.', url,
                      method)

    # Compress the serialized JSON being sent as content if over 64KiB
    # in size. If less than 2MB in size compress for speed. If over
    # 2MB then compress for smallest size. This parallels what the Ruby
    # agent does.

    if len(data) > 64 * 1024:
        headers['Content-Encoding'] = 'deflate'
        level = (len(data) < 2000000) and 1 or 9

        internal_metric('Supportability/Collector/ZLIB/Bytes/%s' % method,
                        len(data))

        with InternalTrace('Supportability/Collector/ZLIB/Compress/'
                           '%s' % method):
            data = zlib.compress(six.b(data), level)

    # If there is no requests session object provided for making
    # requests create one now. We want to close this as soon as we
    # are done with it.

    auto_close_session = False

    if not session:
        session = requests.session()
        auto_close_session = True

    # The 'requests' library can raise a number of exception derived
    # from 'RequestException' before we even manage to get a connection
    # to the data collector.
    #
    # The data collector can the generate a number of different types of
    # HTTP errors for requests. These are:
    #
    # 400 Bad Request - For incorrect method type or incorrectly
    # construct parameters. We should not get this and if we do it would
    # likely indicate a problem with the implementation of the agent.
    #
    # 413 Request Entity Too Large - Where the request content was too
    # large. The limits on number of nodes in slow transaction traces
    # should in general prevent this, but not everything has size limits
    # and so rogue data could still blow things out. Same data is not
    # going to work later on in a subsequent request, even if aggregated
    # with other data, so we need to log the details and then flag that
    # data should be thrown away.
    #
    # 415 Unsupported Media Type - This occurs when the JSON which was
    # sent can't be decoded by the data collector. If this is a true
    # problem with the JSON formatting, then sending again, even if
    # aggregated with other data, may not work, so we need to log the
    # details and then flag that data should be thrown away.
    #
    # 503 Service Unavailable - This occurs when data collector, or core
    # application is being restarted and not in state to be able to
    # accept requests. It should be a transient issue so should be able
    # to retain data and try again.

    internal_metric('Supportability/Collector/Output/Bytes/%s' % method,
                    len(data))

    try:
        # The timeout value in the requests module is only on
        # the initial connection and doesn't apply to how long
        # it takes to get back a response.

        timeout = settings.agent_limits.data_collector_timeout

        r = session.post(url,
                         params=params,
                         headers=headers,
                         proxies=proxies,
                         timeout=timeout,
                         data=data)

        # Read the content now so we can force close the socket
        # connection if this is a transient session as quickly
        # as possible.

        content = r.content

    except requests.RequestException:
        if not settings.proxy_host or not settings.proxy_port:
            _logger.warning(
                'Data collector is not contactable. This can be '
                'because of a network issue or because of the data '
                'collector being restarted. In the event that contact '
                'cannot be made after a period of time then please '
                'report this problem to New Relic support for further '
                'investigation. The error raised was %r.',
                sys.exc_info()[1])
        else:
            _logger.warning(
                'Data collector is not contactable via the proxy '
                'host %r on port %r with proxy user of %r. This can be '
                'because of a network issue or because of the data '
                'collector being restarted. In the event that contact '
                'cannot be made after a period of time then please '
                'report this problem to New Relic support for further '
                'investigation. The error raised was %r.', settings.proxy_host,
                settings.proxy_port, settings.proxy_user,
                sys.exc_info()[1])

        raise RetryDataForRequest(str(sys.exc_info()[1]))

    finally:
        if auto_close_session:
            session.close()
            session = None

    if r.status_code != 200:
        _logger.debug(
            'Received a non 200 HTTP response from the data '
            'collector where url=%r, method=%r, license_key=%r, '
            'agent_run_id=%r, params=%r, headers=%r, status_code=%r '
            'and content=%r.', url, method, license_key, agent_run_id, params,
            headers, r.status_code, content)

    if r.status_code == 400:
        _logger.error(
            'Data collector is indicating that a bad '
            'request has been submitted for url %r, headers of %r, '
            'params of %r and payload of %r. Please report this '
            'problem to New Relic support.', url, headers, params, payload)

        raise DiscardDataForRequest()

    elif r.status_code == 413:
        _logger.warning(
            'Data collector is indicating that a request for '
            'method %r was received where the request content size '
            'was over the maximum allowed size limit. The length of '
            'the request content was %d. If this keeps occurring on a '
            'regular basis, please report this problem to New Relic '
            'support for further investigation.', method, len(data))

        raise DiscardDataForRequest()

    elif r.status_code == 415:
        _logger.warning(
            'Data collector is indicating that it was sent '
            'malformed JSON data for method %r. If this keeps occurring '
            'on a regular basis, please report this problem to New '
            'Relic support for further investigation.', method)

        if settings.debug.log_malformed_json_data:
            if headers['Content-Encoding'] == 'deflate':
                data = zlib.decompress(data)

            _logger.info(
                'JSON data which was rejected by the data '
                'collector was %r.', data)

        raise DiscardDataForRequest(content)

    elif r.status_code == 503:
        _logger.warning(
            'Data collector is unavailable. This can be a '
            'transient issue because of the data collector or our '
            'core application being restarted. If the issue persists '
            'it can also be indicative of a problem with our servers. '
            'In the event that availability of our servers is not '
            'restored after a period of time then please report this '
            'problem to New Relic support for further investigation.')

        raise ServerIsUnavailable()

    elif r.status_code != 200:
        if not settings.proxy_host or not settings.proxy_port:
            _logger.warning(
                'An unexpected HTTP response was received from '
                'the data collector of %r for method %r. The payload for '
                'the request was %r. If this issue persists then please '
                'report this problem to New Relic support for further '
                'investigation.', r.status_code, method, payload)
        else:
            _logger.warning(
                'An unexpected HTTP response was received from '
                'the data collector of %r for method %r while connecting '
                'via proxy host %r on port %r with proxy user of %r. '
                'The payload for the request was %r. If this issue '
                'persists then please report this problem to New Relic '
                'support for further investigation.', r.status_code, method,
                settings.proxy_host, settings.proxy_port, settings.proxy_user,
                payload)

        raise DiscardDataForRequest()

    # Log details of response payload for debugging. Use the JSON
    # encoded value so know that what original encoded value was.

    duration = time.time() - start

    if settings.debug.log_data_collector_payloads:
        _logger.debug(
            'Valid response from data collector after %.2f '
            'seconds with content=%r.', duration, content)
    elif settings.debug.log_data_collector_calls:
        _logger.debug(
            'Valid response from data collector after %.2f '
            'seconds.', duration)

    # If we got this far we should have a legitimate response from the
    # data collector. The response is JSON so need to decode it.

    internal_metric('Supportability/Collector/Input/Bytes/%s' % method,
                    len(content))

    try:
        with InternalTrace('Supportability/Collector/JSON/Decode/%s' % method):
            if six.PY3:
                content = content.decode('UTF-8')

            result = json_decode(content)

    except Exception:
        _logger.exception(
            'Error decoding data for JSON payload for '
            'method %r with payload of %r. Please report this problem '
            'to New Relic support.', method, content)

        if settings.debug.log_malformed_json_data:
            _logger.info(
                'JSON data received from data collector which '
                'could not be decoded was %r.', content)

        raise DiscardDataForRequest(str(sys.exc_info()[1]))

    # The decoded JSON can be either for a successful response or an
    # error. A successful response has a 'return_value' element and an
    # error an 'exception' element.

    if 'return_value' in result:
        return result['return_value']

    error_type = result['exception']['error_type']
    message = result['exception']['message']

    # Now need to check for server side exceptions. The following
    # exceptions can occur for abnormal events.

    _logger.debug(
        'Received an exception from the data collector where '
        'url=%r, method=%r, license_key=%r, agent_run_id=%r, params=%r, '
        'headers=%r, error_type=%r and message=%r', url, method, license_key,
        agent_run_id, params, headers, error_type, message)

    if error_type == 'NewRelic::Agent::LicenseException':
        _logger.error(
            'Data collector is indicating that an incorrect '
            'license key has been supplied by the agent. The value '
            'which was used by the agent is %r. Please correct any '
            'problem with the license key or report this problem to '
            'New Relic support.', license_key)

        raise DiscardDataForRequest(message)

    elif error_type == 'NewRelic::Agent::PostTooBigException':
        _logger.warning(
            'Core application is indicating that a request for '
            'method %r was received where the request content size '
            'was over the maximum allowed size limit. The length of '
            'the request content was %d. If this keeps occurring on a '
            'regular basis, please report this problem to New Relic '
            'support for further investigation.', method, len(data))

        raise DiscardDataForRequest(message)

    # Server side exceptions are also used to inform the agent to
    # perform certain actions such as restart when server side
    # configuration has changed for this application or when agent is
    # being disabled remotely for some reason.

    if error_type == 'NewRelic::Agent::ForceRestartException':
        _logger.info(
            'An automatic internal agent restart has been '
            'requested by the data collector for the application '
            'where the agent run was %r. The reason given for the '
            'forced restart is %r.', agent_run_id, message)

        raise ForceAgentRestart(message)

    elif error_type == 'NewRelic::Agent::ForceDisconnectException':
        _logger.critical(
            'Disconnection of the agent has been requested by '
            'the data collector for the application where the '
            'agent run was %r. The reason given for the forced '
            'disconnection is %r. Please contact New Relic support '
            'for further information.', agent_run_id, message)

        raise ForceAgentDisconnect(message)

    # We received an unexpected server side error we don't know what
    # to do with.

    _logger.warning(
        'An unexpected server error was received from the '
        'data collector for method %r with payload of %r. The error '
        'was of type %r with message %r. If this issue persists '
        'then please report this problem to New Relic support for '
        'further investigation.', method, payload, error_type, message)

    raise DiscardDataForRequest(message)
Exemplo n.º 14
0
def send_request(session, url, method, license_key, agent_run_id=None,
            payload=()):
    """Constructs and sends a request to the data collector."""

    params = {}
    headers = {}
    config = {}

    settings = global_settings()

    start = time.time()

    # Validate that the license key was actually set and if not replace
    # it with a string which makes it more obvious it was not set.

    if not license_key:
        license_key = 'NO LICENSE KEY WAS SET IN AGENT CONFIGURATION'

    # The agent formats requests and is able to handle responses for
    # protocol version 14.

    params['method'] = method
    params['license_key'] = license_key
    params['protocol_version'] = '14'
    params['marshal_format'] = 'json'

    if agent_run_id:
        params['run_id'] = str(agent_run_id)

    headers['User-Agent'] = USER_AGENT
    headers['Content-Encoding'] = 'identity'

    # Set up definitions for proxy server in case that has been set.

    proxies = proxy_server()

    # At this time we use JSON content encoding for the data being sent.
    # If an error does occur when encoding the JSON, then it isn't
    # likely going to work later on in a subsequent request with same
    # data, even if aggregated with other data, so we need to log the
    # details and then flag that data should be thrown away. Don't mind
    # being noisy in the the log in this situation as it would indicate
    # a problem with the implementation of the agent.

    try:
        with InternalTrace('Supportability/Collector/JSON/Encode/%s' % method):
            data = json_encode(payload)

    except Exception:
        _logger.exception('Error encoding data for JSON payload for '
                'method %r with payload of %r. Please report this problem '
                'to New Relic support.', method, payload)

        raise DiscardDataForRequest(str(sys.exc_info()[1]))

    # Log details of call and/or payload for debugging. Use the JSON
    # encoded value so know that what is encoded is correct.

    if settings.debug.log_data_collector_payloads:
        _logger.debug('Calling data collector with url=%r, method=%r and '
                'payload=%r.', url, method, data)
    elif settings.debug.log_data_collector_calls:
        _logger.debug('Calling data collector with url=%r and method=%r.',
                url, method)

    # Compress the serialized JSON being sent as content if over 64KiB
    # in size. If less than 2MB in size compress for speed. If over
    # 2MB then compress for smallest size. This parallels what the Ruby
    # agent does.

    if len(data) > 64*1024:
        headers['Content-Encoding'] = 'deflate'
        level = (len(data) < 2000000) and 1 or 9

        internal_metric('Supportability/Collector/ZLIB/Bytes/%s' % method,
                len(data))

        with InternalTrace('Supportability/Collector/ZLIB/Compress/'
                '%s' % method):
            data = zlib.compress(six.b(data), level)

    # If there is no requests session object provided for making
    # requests create one now. We want to close this as soon as we
    # are done with it.

    auto_close_session = False

    if not session:
        session = requests.session()
        auto_close_session = True

    # The 'requests' library can raise a number of exception derived
    # from 'RequestException' before we even manage to get a connection
    # to the data collector.
    #
    # The data collector can the generate a number of different types of
    # HTTP errors for requests. These are:
    #
    # 400 Bad Request - For incorrect method type or incorrectly
    # construct parameters. We should not get this and if we do it would
    # likely indicate a problem with the implementation of the agent.
    #
    # 413 Request Entity Too Large - Where the request content was too
    # large. The limits on number of nodes in slow transaction traces
    # should in general prevent this, but not everything has size limits
    # and so rogue data could still blow things out. Same data is not
    # going to work later on in a subsequent request, even if aggregated
    # with other data, so we need to log the details and then flag that
    # data should be thrown away.
    #
    # 415 Unsupported Media Type - This occurs when the JSON which was
    # sent can't be decoded by the data collector. If this is a true
    # problem with the JSON formatting, then sending again, even if
    # aggregated with other data, may not work, so we need to log the
    # details and then flag that data should be thrown away.
    #
    # 503 Service Unavailable - This occurs when data collector, or core
    # application is being restarted and not in state to be able to
    # accept requests. It should be a transient issue so should be able
    # to retain data and try again.

    internal_metric('Supportability/Collector/Output/Bytes/%s' % method,
            len(data))

    # If audit logging is enabled, log the requests details.

    log_id = _log_request(url, params, headers, data)

    try:
        # The timeout value in the requests module is only on
        # the initial connection and doesn't apply to how long
        # it takes to get back a response.

        cert_loc = certs.where()

        if settings.debug.disable_certificate_validation:
            cert_loc = False

        timeout = settings.agent_limits.data_collector_timeout

        with warnings.catch_warnings():
            warnings.simplefilter("ignore")

            r = session.post(url, params=params, headers=headers,
                    proxies=proxies, timeout=timeout, data=data,
                    verify=cert_loc)

        # Read the content now so we can force close the socket
        # connection if this is a transient session as quickly
        # as possible.

        content = r.content

    except requests.RequestException:
        if not settings.proxy_host or not settings.proxy_port:
            _logger.warning('Data collector is not contactable. This can be '
                    'because of a network issue or because of the data '
                    'collector being restarted. In the event that contact '
                    'cannot be made after a period of time then please '
                    'report this problem to New Relic support for further '
                    'investigation. The error raised was %r.',
                    sys.exc_info()[1])
        else:
            _logger.warning('Data collector is not contactable via the proxy '
                    'host %r on port %r with proxy user of %r. This can be '
                    'because of a network issue or because of the data '
                    'collector being restarted. In the event that contact '
                    'cannot be made after a period of time then please '
                    'report this problem to New Relic support for further '
                    'investigation. The error raised was %r.',
                    settings.proxy_host, settings.proxy_port,
                    settings.proxy_user, sys.exc_info()[1])

        raise RetryDataForRequest(str(sys.exc_info()[1]))

    finally:
        if auto_close_session:
            session.close()
            session = None

    if r.status_code != 200:
        _logger.debug('Received a non 200 HTTP response from the data '
                'collector where url=%r, method=%r, license_key=%r, '
                'agent_run_id=%r, params=%r, headers=%r, status_code=%r '
                'and content=%r.', url, method, license_key, agent_run_id,
                params, headers, r.status_code, content)

    if r.status_code == 400:
        _logger.error('Data collector is indicating that a bad '
                'request has been submitted for url %r, headers of %r, '
                'params of %r and payload of %r. Please report this '
                'problem to New Relic support.', url, headers, params,
                payload)

        raise DiscardDataForRequest()

    elif r.status_code == 413:
        _logger.warning('Data collector is indicating that a request for '
                'method %r was received where the request content size '
                'was over the maximum allowed size limit. The length of '
                'the request content was %d. If this keeps occurring on a '
                'regular basis, please report this problem to New Relic '
                'support for further investigation.', method, len(data))

        raise DiscardDataForRequest()

    elif r.status_code == 415:
        _logger.warning('Data collector is indicating that it was sent '
                'malformed JSON data for method %r. If this keeps occurring '
                'on a regular basis, please report this problem to New '
                'Relic support for further investigation.', method)

        if settings.debug.log_malformed_json_data:
            if headers['Content-Encoding'] == 'deflate':
                data = zlib.decompress(data)

            _logger.info('JSON data which was rejected by the data '
                    'collector was %r.', data)

        raise DiscardDataForRequest(content)

    elif r.status_code == 503:
        _logger.warning('Data collector is unavailable. This can be a '
                'transient issue because of the data collector or our '
                'core application being restarted. If the issue persists '
                'it can also be indicative of a problem with our servers. '
                'In the event that availability of our servers is not '
                'restored after a period of time then please report this '
                'problem to New Relic support for further investigation.')

        raise ServerIsUnavailable()

    elif r.status_code != 200:
        if not settings.proxy_host or not settings.proxy_port:
            _logger.warning('An unexpected HTTP response was received from '
                    'the data collector of %r for method %r. The payload for '
                    'the request was %r. If this issue persists then please '
                    'report this problem to New Relic support for further '
                    'investigation.', r.status_code, method, payload)
        else:
            _logger.warning('An unexpected HTTP response was received from '
                    'the data collector of %r for method %r while connecting '
                    'via proxy host %r on port %r with proxy user of %r. '
                    'The payload for the request was %r. If this issue '
                    'persists then please report this problem to New Relic '
                    'support for further investigation.', r.status_code,
                    method, settings.proxy_host, settings.proxy_port,
                    settings.proxy_user, payload)

        raise DiscardDataForRequest()

    # Log details of response payload for debugging. Use the JSON
    # encoded value so know that what original encoded value was.

    duration = time.time() - start

    if settings.debug.log_data_collector_payloads:
        _logger.debug('Valid response from data collector after %.2f '
                'seconds with content=%r.', duration, content)
    elif settings.debug.log_data_collector_calls:
        _logger.debug('Valid response from data collector after %.2f '
                'seconds.', duration)

    # If we got this far we should have a legitimate response from the
    # data collector. The response is JSON so need to decode it.

    internal_metric('Supportability/Collector/Input/Bytes/%s' % method,
            len(content))

    try:
        with InternalTrace('Supportability/Collector/JSON/Decode/%s' % method):
            if six.PY3:
                content = content.decode('UTF-8')

            result = json_decode(content)

    except Exception:
        _logger.exception('Error decoding data for JSON payload for '
                'method %r with payload of %r. Please report this problem '
                'to New Relic support.', method, content)

        if settings.debug.log_malformed_json_data:
            _logger.info('JSON data received from data collector which '
                    'could not be decoded was %r.', content)

        raise DiscardDataForRequest(str(sys.exc_info()[1]))

    # The decoded JSON can be either for a successful response or an
    # error. A successful response has a 'return_value' element and on
    # error an 'exception' element.

    if log_id is not None:
        _log_response(log_id, result)

    if 'return_value' in result:
        return result['return_value']

    error_type = result['exception']['error_type']
    message = result['exception']['message']

    # Now need to check for server side exceptions. The following
    # exceptions can occur for abnormal events.

    _logger.debug('Received an exception from the data collector where '
            'url=%r, method=%r, license_key=%r, agent_run_id=%r, params=%r, '
            'headers=%r, error_type=%r and message=%r', url, method,
            license_key, agent_run_id, params, headers, error_type,
            message)

    if error_type == 'NewRelic::Agent::LicenseException':
        _logger.error('Data collector is indicating that an incorrect '
                'license key has been supplied by the agent. The value '
                'which was used by the agent is %r. Please correct any '
                'problem with the license key or report this problem to '
                'New Relic support.', license_key)

        raise DiscardDataForRequest(message)

    elif error_type == 'NewRelic::Agent::PostTooBigException':
        _logger.warning('Core application is indicating that a request for '
                'method %r was received where the request content size '
                'was over the maximum allowed size limit. The length of '
                'the request content was %d. If this keeps occurring on a '
                'regular basis, please report this problem to New Relic '
                'support for further investigation.', method, len(data))

        raise DiscardDataForRequest(message)

    # Server side exceptions are also used to inform the agent to
    # perform certain actions such as restart when server side
    # configuration has changed for this application or when agent is
    # being disabled remotely for some reason.

    if error_type == 'NewRelic::Agent::ForceRestartException':
        _logger.info('An automatic internal agent restart has been '
                'requested by the data collector for the application '
                'where the agent run was %r. The reason given for the '
                'forced restart is %r.', agent_run_id, message)

        raise ForceAgentRestart(message)

    elif error_type == 'NewRelic::Agent::ForceDisconnectException':
        _logger.critical('Disconnection of the agent has been requested by '
                'the data collector for the application where the '
                'agent run was %r. The reason given for the forced '
                'disconnection is %r. Please contact New Relic support '
                'for further information.', agent_run_id, message)

        raise ForceAgentDisconnect(message)

    # We received an unexpected server side error we don't know what
    # to do with.

    _logger.warning('An unexpected server error was received from the '
            'data collector for method %r with payload of %r. The error '
            'was of type %r with message %r. If this issue persists '
            'then please report this problem to New Relic support for '
            'further investigation.', method, payload, error_type, message)

    raise DiscardDataForRequest(message)
Exemplo n.º 15
0
    def __init__(
        self,
        host,
        port=443,
        proxy_scheme=None,
        proxy_host=None,
        proxy_port=None,
        proxy_user=None,
        proxy_pass=None,
        timeout=None,
        ca_bundle_path=None,
        disable_certificate_validation=False,
        compression_threshold=64 * 1024,
        compression_level=None,
        compression_method="gzip",
        max_payload_size_in_bytes=1000000,
        audit_log_fp=None,
    ):
        self._host = host
        port = self._port = port
        self._compression_threshold = compression_threshold
        self._compression_level = compression_level
        self._compression_method = compression_method
        self._max_payload_size_in_bytes = max_payload_size_in_bytes
        self._audit_log_fp = audit_log_fp

        self._prefix = ""

        self._headers = dict(self.BASE_HEADERS)
        self._connection_kwargs = connection_kwargs = {
            "timeout": timeout,
        }
        self._urlopen_kwargs = urlopen_kwargs = {}

        if self.CONNECTION_CLS.scheme == "https":
            if not ca_bundle_path:
                verify_path = get_default_verify_paths()

                # If there is no resolved cafile, assume the bundled certs are
                # required and report this condition as a supportability metric.
                if not verify_path.cafile:
                    ca_bundle_path = certs.where()
                    internal_metric(
                        "Supportability/Python/Certificate/BundleRequired", 1
                    )

            if ca_bundle_path:
                if os.path.isdir(ca_bundle_path):
                    connection_kwargs["ca_cert_dir"] = ca_bundle_path
                else:
                    connection_kwargs["ca_certs"] = ca_bundle_path

            if disable_certificate_validation:
                connection_kwargs["cert_reqs"] = "NONE"

        proxy = self._parse_proxy(
            proxy_scheme, proxy_host, proxy_port, proxy_user, proxy_pass,
        )
        proxy_headers = (
            proxy and proxy.auth and urllib3.make_headers(proxy_basic_auth=proxy.auth)
        )

        if proxy:
            if self.CONNECTION_CLS.scheme == "https" and proxy.scheme != "https":
                connection_kwargs["_proxy"] = proxy
                connection_kwargs["_proxy_headers"] = proxy_headers
            else:
                self._host = proxy.host
                self._port = proxy.port or 443
                self._prefix = self.PREFIX_SCHEME + host + ":" + str(port)
                urlopen_kwargs["assert_same_host"] = False
                if proxy_headers:
                    self._headers.update(proxy_headers)

        # Logging
        self._proxy = proxy

        self._connection_attr = None
    def transaction_trace_data(self):
        """Returns a list of slow transaction data collected
        during the reporting period.

        """
        if not self.__settings:
            return []

        # Create a set 'traces' that is a union of slow transaction,
        # browser_transactions and xray_transactions. This ensures we don't
        # send duplicates of a transaction.

        traces = set()
        if self.__slow_transaction:
            traces.add(self.__slow_transaction)
        traces.update(self.__browser_transactions)
        traces.update(self.__xray_transactions)

        # Return an empty list if no transactions were captured.

        if not traces:
            return []

        trace_data = []
        maximum = self.__settings.agent_limits.transaction_traces_nodes

        for trace in traces:
            transaction_trace = trace.transaction_trace(self, maximum)

            internal_metric(
                'Supportability/StatsEngine/Counts/'
                'transaction_sample_data', trace.trace_node_count)

            data = [transaction_trace, trace.string_table.values()]

            if self.__settings.debug.log_transaction_trace_payload:
                _logger.debug(
                    'Encoding slow transaction data where '
                    'payload=%r.', data)

            with InternalTrace('Supportability/StatsEngine/JSON/Encode/'
                               'transaction_sample_data'):

                json_data = simplejson.dumps(data,
                                             ensure_ascii=True,
                                             encoding='Latin-1',
                                             namedtuple_as_object=False,
                                             default=lambda o: list(iter(o)))

            internal_metric(
                'Supportability/StatsEngine/ZLIB/Bytes/'
                'transaction_sample_data', len(json_data))

            with InternalTrace('Supportability/StatsEngine/ZLIB/Compress/'
                               'transaction_sample_data'):
                zlib_data = zlib.compress(json_data)

            with InternalTrace('Supportability/StatsEngine/BASE64/Encode/'
                               'transaction_sample_data'):
                pack_data = base64.standard_b64encode(zlib_data)

            root = transaction_trace.root
            xray_id = getattr(trace, 'xray_id', None)

            if (xray_id or trace.rum_trace or trace.record_tt):
                force_persist = True
            else:
                force_persist = False

            trace_data.append([
                root.start_time,
                root.end_time - root.start_time,
                trace.path,
                trace.request_uri,
                pack_data,
                trace.guid,
                None,
                force_persist,
                xray_id,
            ])

        return trace_data
Exemplo n.º 17
0
            'Calling data collector with url=%r, method=%r and '
            'payload=%r.', url, method, data)
    elif settings.debug.log_data_collector_calls:
        _logger.debug('Calling data collector with url=%r and method=%r.', url,
                      method)

    # Compress the serialized JSON being sent as content if over 64KiB
    # in size. If less than 2MB in size compress for speed. If over
    # 2MB then compress for smallest size. This parallels what the Ruby
    # agent does.

    if len(data) > 64 * 1024:
        headers['Content-Encoding'] = 'deflate'
        level = (len(data) < 2000000) and 1 or 9

        internal_metric('Supportability/Collector/ZLIB/Bytes/%s' % method,
                        len(data))

        with InternalTrace('Supportability/Collector/ZLIB/Compress/'
                           '%s' % method):
            data = zlib.compress(data, level)

    # If there is no requests session object provided for making
    # requests create one now. We use a transient session to get around
    # designed flaws in the requests/urllib3 modules. See notes for
    # close_requests_session() function above. Note that keep alive
    # must be set to true at this point to ensure that the pool is
    # actually used to allow us to be able to close the connection.

    auto_close_session = False

    if not session:
Exemplo n.º 18
0
    def transaction_trace_data(self):
        """Returns a list of slow transaction data collected
        during the reporting period.

        """
        if not self.__settings:
            return []

        # Create a set 'traces' that is a union of slow transaction,
        # browser_transactions and xray_transactions. This ensures we don't
        # send duplicates of a transaction.

        traces = set()
        if self.__slow_transaction:
            traces.add(self.__slow_transaction)
        traces.update(self.__browser_transactions)
        traces.update(self.__xray_transactions)

        # Return an empty list if no transactions were captured.

        if not traces:
            return []

        trace_data = []
        maximum = self.__settings.agent_limits.transaction_traces_nodes

        for trace in traces:
            transaction_trace = trace.transaction_trace(self, maximum)

            internal_metric('Supportability/StatsEngine/Counts/'
                            'transaction_sample_data',
                            trace.trace_node_count)

            data = [transaction_trace,
                    list(trace.string_table.values())]

            if self.__settings.debug.log_transaction_trace_payload:
                _logger.debug('Encoding slow transaction data where '
                              'payload=%r.', data)

            with InternalTrace('Supportability/StatsEngine/JSON/Encode/'
                               'transaction_sample_data'):

                json_data = simplejson.dumps(data, ensure_ascii=True,
                        encoding='Latin-1', namedtuple_as_object=False,
                        default=lambda o: list(iter(o)))

            internal_metric('Supportability/StatsEngine/ZLIB/Bytes/'
                            'transaction_sample_data', len(json_data))

            with InternalTrace('Supportability/StatsEngine/ZLIB/Compress/'
                               'transaction_sample_data'):
                zlib_data = zlib.compress(six.b(json_data))

            with InternalTrace('Supportability/StatsEngine/BASE64/Encode/'
                               'transaction_sample_data'):
                pack_data = base64.standard_b64encode(zlib_data)

            root = transaction_trace.root
            xray_id = getattr(trace, 'xray_id', None)

            if (xray_id or trace.rum_trace or trace.record_tt):
                force_persist = True
            else:
                force_persist = False

            trace_data.append([root.start_time,
                    root.end_time - root.start_time,
                    trace.path,
                    trace.request_uri,
                    pack_data,
                    trace.guid,
                    None,
                    force_persist,
                    xray_id,])

        return trace_data
Exemplo n.º 19
0
    def harvest(self, shutdown=False):
        """Performs a harvest, reporting aggregated data for the current
        reporting period to the data collector.

        """

        if self._agent_shutdown:
            return

        if not self._active_session:
            _logger.debug('Cannot perform a data harvest for %r as '
                    'there is no active session.', self._app_name)

            return

        internal_metrics = ValueMetrics()

        with InternalTraceContext(internal_metrics):
            with InternalTrace('Supportability/Harvest/Calls/harvest'):

                self._harvest_count += 1

                start = time.time()

                _logger.debug('Commencing data harvest for %r.',
                        self._app_name)

                # Create a snapshot of the transaction stats and
                # application specific custom metrics stats, then merge
                # them together. The originals will be reset at the time
                # this is done so that any new metrics that come in from
                # this point onwards will be accumulated in a fresh
                # bucket.

                transaction_count = self._transaction_count

                with self._stats_lock:
                    self._transaction_count = 0
                    self._last_transaction = 0.0

                    stats = self._stats_engine.harvest_snapshot()

                with self._stats_custom_lock:
                    stats_custom = self._stats_custom_engine.harvest_snapshot()

                stats.merge_metric_stats(stats_custom)

                # Now merge in any metrics from the data samplers
                # associated with this application.
                #
                # NOTE If a data sampler has problems then what data was
                # collected up to that point is retained. The data
                # collector itself is still retained and would be used
                # again on future harvest. If it is a persistent problem
                # with the data sampler the issue would then reoccur
                # with every harvest. If data sampler is a user provided
                # data sampler, then should perhaps deregister it if it
                # keeps having problems.

                for sampler in self._samplers:
                    try:
                        for metric in sampler.value_metrics():
                            stats.record_value_metric(metric)

                    except Exception:
                        _logger.exception('The merging of value metrics from '
                                'a data sampler has failed. If this issue '
                                'persists then please report this problem to '
                                'New Relic support for further investigation.')

                # Add a metric we can use to track how many harvest
                # periods have occurred.

                stats.record_value_metric(ValueMetric(
                        name='Instance/Reporting', value=0))

                # Create our time stamp as to when this reporting period
                # ends and start reporting the data.

                period_end = time.time()

                # If this harvest is being forcibly triggered on process
                # shutdown, there are transactions recorded, and the
                # duration of the harvest period is less than 1 second,
                # then artificially push out the end time of the harvest
                # period. This is done so that the harvest period is not
                # less than 1 second, otherwise the data collector will
                # throw the data away. This is desirable for case where
                # trying to monitor scripts which perform a one off task
                # and then immediately exit. Also useful when running
                # test scripts.

                if shutdown and transaction_count != 0:
                    if period_end - self._period_start < 1.0:
                        _logger.debug('Stretching harvest duration for '
                                'forced harvest on shutdown.')
                        period_end = self._period_start + 1.001

                try:
                    configuration = self._active_session.configuration

                    # Send the transaction and custom metric data.

                    # Create a metric_normalizer based on normalize_name
                    # If metric rename rules are empty, set normalizer
                    # to None and the stats engine will skip steps as
                    # appropriate.

                    if self._rules_engine['metric'].rules:
                        metric_normalizer = partial(self.normalize_name,
                                rule_type='metric')
                    else:
                        metric_normalizer = None

                    # Pass the metric_normalizer to stats.metric_data to
                    # do metric renaming.

                    metric_data = stats.metric_data(metric_normalizer)

                    internal_metric('Supportability/Harvest/Counts/'
                            'metric_data', len(metric_data))

                    metric_ids = self._active_session.send_metric_data(
                      self._period_start, period_end, metric_data)

                    # Successful, so we update the stats engine with the
                    # new metric IDs and reset the reporting period
                    # start time. If an error occurs after this point,
                    # any remaining data for the period being reported
                    # on will be thrown away. We reset the count of
                    # number of merges we have done due to failures as
                    # only really want to count errors in being able to
                    # report the main transaction metrics.

                    self._merge_count = 0
                    self._period_start = period_end
                    self._stats_engine.update_metric_ids(metric_ids)

                    # Send the accumulated error data.

                    if configuration.collect_errors:
                        error_data = stats.error_data()

                        internal_metric('Supportability/Harvest/Counts/'
                                'error_data', len(error_data))

                        if error_data:
                            self._active_session.send_errors(error_data)

                    if configuration.collect_traces:
                        slow_sql_data = stats.slow_sql_data()

                        internal_metric('Supportability/Harvest/Counts/'
                                'sql_trace_data', len(slow_sql_data))

                        if slow_sql_data:
                            self._active_session.send_sql_traces(slow_sql_data)

                        slow_transaction_data = stats.transaction_trace_data()

                        internal_metric('Supportability/Harvest/Counts/'
                                'transaction_sample_data',
                                len(slow_transaction_data))

                        if slow_transaction_data:
                            self._active_session.send_transaction_traces(
                                    slow_transaction_data)

                    # Get agent commands from collector.

                    agent_commands = self._active_session.get_agent_commands()

                    # For each agent command received, call the
                    # appropiate agent command handler. Reply to the
                    # data collector with the acknowledgement of the
                    # agent command.

                    for command in agent_commands:
                        cmd_id = command[0]
                        cmd_name = command[1]['name']
                        cmd_args = command[1]['arguments']

                        # An agent command is mapped to a method of this
                        # class. If we don't know about a specific agent
                        # command we just ignore it.

                        cmd_handler = getattr(self, cmd_name, None)

                        if cmd_handler is None:
                            _logger.debug('Received unknown agent command '
                                    '%r from the data collector for %r.',
                                    cmd_name, self._app_name)
                            continue

                        cmd_res = cmd_handler(cmd_id, **cmd_args)

                        if cmd_res:
                            self._active_session.send_agent_command_results(
                                    cmd_res)

                    # If a profiling session is already running, check
                    # if it is completed and send the accumulated
                    # profile data back to the data collector. Note that
                    # this come after we process the agent commands as
                    # we might receive an agent command to stop the
                    # profiling session, but still send the data back.
                    # Having the sending of the results last ensures we
                    # send back that data from the stopped profiling
                    # session immediately.

                    if self._profiler_started:
                        profile_data = self._thread_profiler.profile_data()

                        if profile_data and self._send_profile_data:
                            _logger.debug('Reporting thread profiling '
                                    'session data for %r.', self._app_name)

                            self._active_session.send_profile_data(profile_data)

                            self._profiler_started = False
                            self._send_profile_data = False

                    # If this is a final forced harvest for the process
                    # then attempt to shutdown the session.
                    #
                    # If a thread profiling session is running, we need
                    # to make sure we stop that from running as well.

                    if shutdown:
                        if self._profiler_started:
                            _logger.info('Aborting thread profiling session '
                                    'for %r.', self._app_name)

                            self._thread_profiler.stop_profiling(
                                    wait_for_completion=False)

                            self._thread_profiler = None
                            self._profiler_started = False
                            self._send_profile_data = False

                        try:
                            self._active_session.shutdown_session()
                        except Exception:
                            pass

                        self._active_session = None

                except ForceAgentRestart:
                    # The data collector has indicated that we need to
                    # perform an internal agent restart. We attempt to
                    # properly shutdown the session and then initiate a
                    # new session.
                    #
                    # If a thread profiling session is running, we need
                    # to make sure we stop that from running as well as
                    # any data will not be able to be reported later if
                    # do reconnect as will be a different agent run.

                    if self._profiler_started:
                        _logger.info('Aborting thread profiling session '
                                'for %r.', self._app_name)

                        self._thread_profiler.stop_profiling(
                                wait_for_completion=False)

                        self._thread_profiler = None
                        self._profiler_started = False
                        self._send_profile_data = False

                    try:
                        self._active_session.shutdown_session()
                    except Exception:
                        pass

                    self._agent_restart += 1
                    self._active_session = None

                    self.activate_session()

                except ForceAgentDisconnect:
                    # The data collector has indicated that we need to
                    # force disconnect and stop reporting. We attempt to
                    # properly shutdown the session, but don't start a
                    # new one and flag ourselves as shutdown. This
                    # notification is presumably sent when a specific
                    # application is behaving so badly that it needs to
                    # be stopped entirely. It would require a complete
                    # process start to be able to attempt to connect
                    # again and if the server side kill switch is still
                    # enabled it would be told to disconnect once more.
                    #
                    # If a thread profiling session is running, we need
                    # to make sure we stop that from running as well as
                    # the agent will no longer be reporting without a
                    # restart of the process so no point.

                    if self._profiler_started:
                        _logger.info('Aborting thread profiling session '
                                'for %r.', self._app_name)

                        self._thread_profiler.stop_profiling(
                                wait_for_completion=False)

                        self._thread_profiler = None
                        self._profiler_started = False
                        self._send_profile_data = False

                    try:
                        self._active_session.shutdown_session()
                    except Exception:
                        pass

                    self._active_session = None

                    self._agent_shutdown = True

                except RetryDataForRequest:
                    # A potentially recoverable error occurred. We merge
                    # the stats back into that for the current period
                    # and abort the current harvest if the problem
                    # occurred when initially reporting the main
                    # transaction metrics. If the problem occurred when
                    # reporting other information then that and any
                    # other non reported information is thrown away.
                    #
                    # In order to prevent memory growth will we only
                    # merge data up to a set maximum number of
                    # successive times. When this occurs we throw away
                    # all the metric data and start over. We also only
                    # merge main metric data and discard errors, slow
                    # SQL and transaction traces from older harvest
                    # period.

                    if self._period_start != period_end:

                        self._merge_count += 1

                        agent_limits = configuration.agent_limits
                        maximum = agent_limits.merge_stats_maximum

                        if self._merge_count <= maximum:
                            self._stats_engine.merge_metric_stats(stats)

                        else:
                            _logger.error('Unable to report main transaction '
                                    'metrics after %r successive attempts. '
                                    'Check the log messages and if necessary '
                                    'please report this problem to New Relic '
                                    'support for further investigation.',
                                    maximum)

                            self._discard_count += self._merge_count

                            self._merge_count = 0

                except DiscardDataForRequest:
                    # An issue must have occurred in reporting the data
                    # but if we retry with same data the same error is
                    # likely to occur again so we just throw any data
                    # not sent away for this reporting period.

                    self._discard_count += 1

                except Exception:
                    # An unexpected error, likely some sort of internal
                    # agent implementation issue.

                    _logger.exception('Unexpected exception when attempting '
                            'to harvest the metric data and send it to the '
                            'data collector. Please report this problem to '
                            'New Relic support for further investigation.')

                duration = time.time() - start

                _logger.debug('Completed harvest for %r in %.2f seconds.',
                        self._app_name, duration)

                # Force close the socket connection which has been
                # created for this harvest if session still exists.
                # New connection will be create automatically on the
                # next harvest.

                if self._active_session:
                    self._active_session.close_connection()

        # Merge back in statistics recorded about the last harvest
        # and communication with the data collector. This will be
        # part of the data for the next harvest period.

        with self._stats_lock:
            self._stats_engine.merge_value_metrics(internal_metrics.metrics())
Exemplo n.º 20
0
    if settings.debug.log_data_collector_payloads:
        _logger.debug('Calling data collector with method=%r and '
                'payload=%r.', method, data)
    elif settings.debug.log_data_collector_calls:
        _logger.debug('Calling data collector with method=%r.', method)

    # Compress the serialized JSON being sent as content if over 64KiB
    # in size. If less than 2MB in size compress for speed. If over
    # 2MB then compress for smallest size. This parallels what the Ruby
    # agent does.

    if len(data) > 64*1024:
        headers['Content-Encoding'] = 'deflate'
        level = (len(data) < 2000000) and 1 or 9

        internal_metric('Supportability/Collector/ZLIB/Bytes/%s' % method,
                len(data))

        with InternalTrace('Supportability/Collector/ZLIB/Compress/'
                '%s' % method):
            data = zlib.compress(data, level)

    # If no requests session object for socket connection create one
    # now. We use a session object rather than direct post as want to
    # ensure that socket connection isn't kept alive by requests.

    if not session:
        session_config = {}
        session_config['keep_alive'] = True
        session_config['pool_connections'] = 1
        session_config['pool_maxsize'] = 1