def record_transaction(self, data): """Record a single transaction against this application.""" if not self._active_session: return if self._stats_engine.settings is None: return # Do checks to see whether trying to record a transaction in a # different process to that the application was activated in. self.validate_process() internal_metrics = ValueMetrics() with InternalTraceContext(internal_metrics): try: # We accumulate stats into a workarea and only then merge it # into the main one under a thread lock. Do this to ensure # that the process of generating the metrics into the stats # don't unecessarily lock out another thread. stats = self._stats_engine.create_workarea() stats.record_transaction(data) except Exception: _logger.exception('The generation of transaction data has ' 'failed. This would indicate some sort of internal ' 'implementation issue with the agent. Please report ' 'this problem to New Relic support for further ' 'investigation.') with self._stats_lock: try: self._transaction_count += 1 self._last_transaction = data.end_time internal_metric('Supportability/Transaction/Counts/' 'metric_data', stats.metric_data_count()) self._stats_engine.merge_metric_stats(stats) self._stats_engine.merge_other_stats(stats) # We merge the internal statistics here as well even # though have popped out of the context where we are # recording. This is okay so long as don't record # anything else after this point. If we do then that # data will not be recorded. self._stats_engine.merge_value_metrics( internal_metrics.metrics()) except Exception: _logger.exception('The merging of transaction data has ' 'failed. This would indicate some sort of ' 'internal implementation issue with the agent. ' 'Please report this problem to New Relic support ' 'for further investigation.')
def harvest(self, shutdown=False): """Performs a harvest, reporting aggregated data for the current reporting period to the data collector. """ if self._agent_shutdown: return if not self._active_session: _logger.debug('Cannot perform a data harvest for %r as ' 'there is no active session.', self._app_name) return internal_metrics = ValueMetrics() with InternalTraceContext(internal_metrics): with InternalTrace('Supportability/Harvest/Calls/harvest'): self._harvest_count += 1 start = time.time() _logger.debug('Commencing data harvest for %r.', self._app_name) # Create a snapshot of the transaction stats and # application specific custom metrics stats, then merge # them together. The originals will be reset at the time # this is done so that any new metrics that come in from # this point onwards will be accumulated in a fresh # bucket. transaction_count = self._transaction_count with self._stats_lock: self._transaction_count = 0 self._last_transaction = 0.0 stats = self._stats_engine.harvest_snapshot() with self._stats_custom_lock: stats_custom = self._stats_custom_engine.harvest_snapshot() stats.merge_metric_stats(stats_custom) # Now merge in any metrics from the data samplers # associated with this application. # # NOTE If a data sampler has problems then what data was # collected up to that point is retained. The data # collector itself is still retained and would be used # again on future harvest. If it is a persistent problem # with the data sampler the issue would then reoccur # with every harvest. If data sampler is a user provided # data sampler, then should perhaps deregister it if it # keeps having problems. for sampler in self._samplers: try: for metric in sampler.value_metrics(): stats.record_value_metric(metric) except Exception: _logger.exception('The merging of value metrics from ' 'a data sampler has failed. If this issue ' 'persists then please report this problem to ' 'New Relic support for further investigation.') # Add a metric we can use to track how many harvest # periods have occurred. stats.record_value_metric(ValueMetric( name='Instance/Reporting', value=0)) # Create our time stamp as to when this reporting period # ends and start reporting the data. period_end = time.time() # If this harvest is being forcibly triggered on process # shutdown, there are transactions recorded, and the # duration of the harvest period is less than 1 second, # then artificially push out the end time of the harvest # period. This is done so that the harvest period is not # less than 1 second, otherwise the data collector will # throw the data away. This is desirable for case where # trying to monitor scripts which perform a one off task # and then immediately exit. Also useful when running # test scripts. if shutdown and transaction_count != 0: if period_end - self._period_start < 1.0: _logger.debug('Stretching harvest duration for ' 'forced harvest on shutdown.') period_end = self._period_start + 1.001 try: configuration = self._active_session.configuration # Send the transaction and custom metric data. # Create a metric_normalizer based on normalize_name # If metric rename rules are empty, set normalizer # to None and the stats engine will skip steps as # appropriate. if self._rules_engine['metric'].rules: metric_normalizer = partial(self.normalize_name, rule_type='metric') else: metric_normalizer = None # Pass the metric_normalizer to stats.metric_data to # do metric renaming. metric_data = stats.metric_data(metric_normalizer) internal_metric('Supportability/Harvest/Counts/' 'metric_data', len(metric_data)) metric_ids = self._active_session.send_metric_data( self._period_start, period_end, metric_data) # Successful, so we update the stats engine with the # new metric IDs and reset the reporting period # start time. If an error occurs after this point, # any remaining data for the period being reported # on will be thrown away. We reset the count of # number of merges we have done due to failures as # only really want to count errors in being able to # report the main transaction metrics. self._merge_count = 0 self._period_start = period_end self._stats_engine.update_metric_ids(metric_ids) # Send the accumulated error data. if configuration.collect_errors: error_data = stats.error_data() internal_metric('Supportability/Harvest/Counts/' 'error_data', len(error_data)) if error_data: self._active_session.send_errors(error_data) if configuration.collect_traces: slow_sql_data = stats.slow_sql_data() internal_metric('Supportability/Harvest/Counts/' 'sql_trace_data', len(slow_sql_data)) if slow_sql_data: self._active_session.send_sql_traces(slow_sql_data) slow_transaction_data = stats.transaction_trace_data() internal_metric('Supportability/Harvest/Counts/' 'transaction_sample_data', len(slow_transaction_data)) if slow_transaction_data: self._active_session.send_transaction_traces( slow_transaction_data) # Get agent commands from collector. agent_commands = self._active_session.get_agent_commands() # For each agent command received, call the # appropiate agent command handler. Reply to the # data collector with the acknowledgement of the # agent command. for command in agent_commands: cmd_id = command[0] cmd_name = command[1]['name'] cmd_args = command[1]['arguments'] # An agent command is mapped to a method of this # class. If we don't know about a specific agent # command we just ignore it. cmd_handler = getattr(self, cmd_name, None) if cmd_handler is None: _logger.debug('Received unknown agent command ' '%r from the data collector for %r.', cmd_name, self._app_name) continue cmd_res = cmd_handler(cmd_id, **cmd_args) if cmd_res: self._active_session.send_agent_command_results( cmd_res) # If a profiling session is already running, check # if it is completed and send the accumulated # profile data back to the data collector. Note that # this come after we process the agent commands as # we might receive an agent command to stop the # profiling session, but still send the data back. # Having the sending of the results last ensures we # send back that data from the stopped profiling # session immediately. if self._profiler_started: profile_data = self._thread_profiler.profile_data() if profile_data and self._send_profile_data: _logger.debug('Reporting thread profiling ' 'session data for %r.', self._app_name) self._active_session.send_profile_data(profile_data) self._profiler_started = False self._send_profile_data = False # If this is a final forced harvest for the process # then attempt to shutdown the session. # # If a thread profiling session is running, we need # to make sure we stop that from running as well. if shutdown: if self._profiler_started: _logger.info('Aborting thread profiling session ' 'for %r.', self._app_name) self._thread_profiler.stop_profiling( wait_for_completion=False) self._thread_profiler = None self._profiler_started = False self._send_profile_data = False try: self._active_session.shutdown_session() except Exception: pass self._active_session = None except ForceAgentRestart: # The data collector has indicated that we need to # perform an internal agent restart. We attempt to # properly shutdown the session and then initiate a # new session. # # If a thread profiling session is running, we need # to make sure we stop that from running as well as # any data will not be able to be reported later if # do reconnect as will be a different agent run. if self._profiler_started: _logger.info('Aborting thread profiling session ' 'for %r.', self._app_name) self._thread_profiler.stop_profiling( wait_for_completion=False) self._thread_profiler = None self._profiler_started = False self._send_profile_data = False try: self._active_session.shutdown_session() except Exception: pass self._agent_restart += 1 self._active_session = None self.activate_session() except ForceAgentDisconnect: # The data collector has indicated that we need to # force disconnect and stop reporting. We attempt to # properly shutdown the session, but don't start a # new one and flag ourselves as shutdown. This # notification is presumably sent when a specific # application is behaving so badly that it needs to # be stopped entirely. It would require a complete # process start to be able to attempt to connect # again and if the server side kill switch is still # enabled it would be told to disconnect once more. # # If a thread profiling session is running, we need # to make sure we stop that from running as well as # the agent will no longer be reporting without a # restart of the process so no point. if self._profiler_started: _logger.info('Aborting thread profiling session ' 'for %r.', self._app_name) self._thread_profiler.stop_profiling( wait_for_completion=False) self._thread_profiler = None self._profiler_started = False self._send_profile_data = False try: self._active_session.shutdown_session() except Exception: pass self._active_session = None self._agent_shutdown = True except RetryDataForRequest: # A potentially recoverable error occurred. We merge # the stats back into that for the current period # and abort the current harvest if the problem # occurred when initially reporting the main # transaction metrics. If the problem occurred when # reporting other information then that and any # other non reported information is thrown away. # # In order to prevent memory growth will we only # merge data up to a set maximum number of # successive times. When this occurs we throw away # all the metric data and start over. We also only # merge main metric data and discard errors, slow # SQL and transaction traces from older harvest # period. if self._period_start != period_end: self._merge_count += 1 agent_limits = configuration.agent_limits maximum = agent_limits.merge_stats_maximum if self._merge_count <= maximum: self._stats_engine.merge_metric_stats(stats) else: _logger.error('Unable to report main transaction ' 'metrics after %r successive attempts. ' 'Check the log messages and if necessary ' 'please report this problem to New Relic ' 'support for further investigation.', maximum) self._discard_count += self._merge_count self._merge_count = 0 except DiscardDataForRequest: # An issue must have occurred in reporting the data # but if we retry with same data the same error is # likely to occur again so we just throw any data # not sent away for this reporting period. self._discard_count += 1 except Exception: # An unexpected error, likely some sort of internal # agent implementation issue. _logger.exception('Unexpected exception when attempting ' 'to harvest the metric data and send it to the ' 'data collector. Please report this problem to ' 'New Relic support for further investigation.') duration = time.time() - start _logger.debug('Completed harvest for %r in %.2f seconds.', self._app_name, duration) # Force close the socket connection which has been # created for this harvest if session still exists. # New connection will be create automatically on the # next harvest. if self._active_session: self._active_session.close_connection() # Merge back in statistics recorded about the last harvest # and communication with the data collector. This will be # part of the data for the next harvest period. with self._stats_lock: self._stats_engine.merge_value_metrics(internal_metrics.metrics())