class ReprocessingOneRabbitMQCrashStore(ReprocessingRabbitMQCrashStore): required_config = Namespace() required_config.rabbitmq_class = change_default( RabbitMQCrashStorage, 'rabbitmq_class', ConnectionContext, ) required_config.routing_key = change_default( RabbitMQCrashStorage, 'routing_key', 'socorro.reprocessing' ) def reprocess(self, crash_ids): if not isinstance(crash_ids, (list, tuple)): crash_ids = [crash_ids] success = bool(crash_ids) for crash_id in crash_ids: if not self.save_raw_crash( DotDict({'legacy_processing': 0}), [], crash_id ): success = False return success
class JitCrashCategorizeRule(ExternalProcessRule): required_config = Namespace() required_config.command_line = change_default( ExternalProcessRule, 'command_line', 'timeout -s KILL 30 {command_pathname} ' '{dump_file_pathname} ' '2>/dev/null') required_config.command_pathname = change_default( ExternalProcessRule, 'command_pathname', '/data/socorro/stackwalk/bin/jit-crash-categorize', ) required_config.result_key = change_default( ExternalProcessRule, 'result_key', 'classifications.jit.category', ) required_config.return_code_key = change_default( ExternalProcessRule, 'return_code_key', 'classifications.jit.category_return_code', ) required_config.add_option( 'threshold', doc="max number of frames until encountering target frame", default=8) #-------------------------------------------------------------------------- def __init__(self, config): super(JitCrashCategorizeRule, self).__init__(config) #-------------------------------------------------------------------------- def _predicate(self, raw_crash, raw_dumps, processed_crash, proc_meta): if (processed_crash.product != 'Firefox' or not processed_crash.os_name.startswith('Windows') or processed_crash.cpu_name != 'x86'): # we don't want any of these return False if processed_crash.json_dump['crashing_thread']['frames'][0].get( 'module', False ): # there is a module at the top of the stack, we don't want this return False return (processed_crash.signature.endswith('EnterBaseline') or processed_crash.signature.endswith('EnterIon')) #-------------------------------------------------------------------------- def _interpret_external_command_output(self, fp, processor_meta): try: result = fp.read() except IOError, x: processor_meta.processor_notes.append( "%s unable to read external command output: %s" % (self.config.command_pathname, x)) return '' try: return result.strip() except AttributeError, x: # there's no strip method return result
class CorrelationInterestingAddonsVersionsRule( CorrelationInterestingModulesRule): required_config = Namespace() required_config.addons = change_default(CorrelationInterestingModulesRule, 'addons', True) required_config.show_versions = change_default( CorrelationInterestingModulesRule, 'show_versions', True)
class ReprocessingRabbitMQCrashStore(RabbitMQCrashStorage): required_config = Namespace() required_config.routing_key = change_default( RabbitMQCrashStorage, 'routing_key', 'socorro.reprocessing' ) required_config.filter_on_legacy_processing = change_default( RabbitMQCrashStorage, 'filter_on_legacy_processing', False )
class RegionalS3ConnectionContext(S3ConnectionContext): """This derviced class forces you to connect to a specific region which means we can use the OrdinaryCallingFormat as a calling format and then we'll be able to connect to S3 buckets with names in them. """ required_config = Namespace() required_config.add_option( 'region', doc="Name of the S3 region (e.g. us-west-2)", default='us-west-2', reference_value_from='resource.boto', ) required_config.calling_format = change_default( S3ConnectionContext, 'calling_format', 'boto.s3.connection.OrdinaryCallingFormat') #-------------------------------------------------------------------------- def __init__(self, config, quit_check_callback=None): super(RegionalS3ConnectionContext, self).__init__(config) self._region = config.region self._connect_to_endpoint = boto.s3.connect_to_region #-------------------------------------------------------------------------- def _connect(self): try: return self.connection except AttributeError: self.connection = self._connect_to_endpoint( self._region, **self._get_credentials()) return self.connection
class SocorroLiteProcessorAlgorithm2015(Processor2015): """this is the class that processor uses to transform """ required_config = Namespace() required_config.rule_sets = change_default( Processor2015, 'rule_sets', ujson.dumps(socorrolite_processor_rule_sets))
def test_change_default(self): class Alpha(RequiredConfig): required_config = Namespace() required_config.add_option( 'an_option', default=19, doc='this is an an_option', from_string_converter=str, ) a_new_option_with_a_new_default = change_default( Alpha, 'an_option', '29300' ) ok_( a_new_option_with_a_new_default is not Alpha.required_config.an_option ) eq_( a_new_option_with_a_new_default.default, '29300' ) eq_( Alpha.required_config.an_option.default, 19 )
class PriorityjobRabbitMQCrashStore(RabbitMQCrashStorage): required_config = Namespace() required_config.rabbitmq_class = change_default( RabbitMQCrashStorage, 'rabbitmq_class', ConnectionContext, ) required_config.add_option( 'routing_key', default='socorro.priority', doc='the name of the queue to receive crashes', ) def process(self, crash_ids): if not isinstance(crash_ids, (list, tuple)): crash_ids = [crash_ids] success = bool(crash_ids) for crash_id in crash_ids: if not self.save_raw_crash( DotDict({'legacy_processing': 0}), [], crash_id ): success = False return success
class JsonFileOutputForCoreCounts(FileOutputForCoreCounts): required_config = Namespace() required_config.path_template = change_default( FileOutputForCoreCounts, 'path_template', '{path}/{prefix}/{prefix}_{key}-{name}.json', ) #-------------------------------------------------------------------------- def output_correlations_to_stream(self, counts_summary_structure, stream): json.dump(counts_summary_structure, stream, indent=4, sort_keys=True)
class DumpLookupExternalRule(ExternalProcessRule): required_config = Namespace() required_config.add_option( 'dump_field', doc='the default name of a dump', default='upload_file_minidump', ) required_config.add_option( 'processor_symbols_pathname_list', doc='comma or space separated list of symbol files just as for ' 'minidump_stackwalk (quote paths with embedded spaces)', default='/mnt/socorro/symbols/symbols_ffx,' '/mnt/socorro/symbols/symbols_sea,' '/mnt/socorro/symbols/symbols_tbrd,' '/mnt/socorro/symbols/symbols_sbrd,' '/mnt/socorro/symbols/symbols_os', from_string_converter=_create_symbol_path_str) required_config.command_pathname = change_default( ExternalProcessRule, 'command_pathname', '/data/socorro/stackwalk/bin/dump-lookup') required_config.command_line = change_default( ExternalProcessRule, 'command_line', 'timeout -s KILL 30 {command_pathname} ' '{dumpfile_pathname} ' '{processor_symbols_pathname_list} ' '2>/dev/null') required_config.result_key = change_default(ExternalProcessRule, 'result_key', 'dump_lookup') required_config.return_code_key = change_default( ExternalProcessRule, 'return_code_key', 'dump_lookup_return_code') #-------------------------------------------------------------------------- def _predicate(self, raw_crash, raw_dumps, processed_crash, processor_meta): return 'create_dump_lookup' in raw_crash
def test_change_default(self): class Alpha(RequiredConfig): required_config = Namespace() required_config.add_option( 'an_option', default=19, doc='this is an an_option', from_string_converter=str, ) a_new_option_with_a_new_default = change_default( Alpha, 'an_option', '29300') ok_(a_new_option_with_a_new_default is not Alpha.required_config.an_option) eq_(a_new_option_with_a_new_default.default, '29300') eq_(Alpha.required_config.an_option.default, 19)
class CountStackWalkerTimeoutKills(CountAnythingRuleBase): required_config = Namespace() required_config.rule_name = change_default( CountAnythingRuleBase, 'rule_name', 'stackwalker_timeout_kills' ) #-------------------------------------------------------------------------- def _predicate(self, raw_crash, raw_dumps, processed_crash, proc_meta): # override me to check any condition within a raw, processed crash # or even the state of the processor itself from the proc_meta return reduce( lambda x, y: x or "SIGKILL" in y, proc_meta.processor_notes, False )
class ESCrashStorageRedactedJsonDump(ESCrashStorageRedactedSave): """This class stores redacted crash reports into Elasticsearch, but instead of removing the entire `json_dump`, it keeps only a subset of its keys. """ required_config = Namespace() required_config.add_option( name="json_dump_whitelist_keys", doc="keys of the json_dump field to keep in the processed crash", default=[ "largest_free_vm_block", "tiny_block_size", "write_combine_size", ], from_string_converter=list_converter, ) required_config.namespace('es_redactor') required_config.es_redactor.add_option( name="redactor_class", doc="the name of the class that implements a 'redact' method", default='socorro.external.crashstorage_base.Redactor', from_string_converter=class_converter, ) required_config.es_redactor.forbidden_keys = change_default( Redactor, "forbidden_keys", "upload_file_minidump_flash1.json_dump, " "upload_file_minidump_flash2.json_dump, " "upload_file_minidump_browser.json_dump") #-------------------------------------------------------------------------- def save_raw_and_processed(self, raw_crash, dumps, processed_crash, crash_id): """This is the only write mechanism that is actually employed in normal usage. """ # Replace the `json_dump` with a subset. json_dump = processed_crash.get('json_dump', {}) redacted_json_dump = { k: json_dump.get(k) for k in self.config.json_dump_whitelist_keys } processed_crash['json_dump'] = redacted_json_dump super(ESCrashStorageRedactedJsonDump, self).save_raw_and_processed(raw_crash, dumps, processed_crash, crash_id)
class PGPVNewCrashSource(PGQueryNewCrashSource): required_config = Namespace() required_config.crash_id_query = change_default( PGQueryNewCrashSource, 'crash_id_query', "select uuid " "from reports_clean rc join product_versions pv " " on rc.product_version_id = pv.product_version_id " "where " "%s <= date_processed and date_processed < %s " "and %s between pv.build_date and pv.sunset_date") required_config.add_option('date', doc="a date in the form YYYY-MM-DD", default=(utc_now() - timedelta(1)).date(), from_string_converter=string_to_datetime) #-------------------------------------------------------------------------- def __init__(self, config, name, quit_check_callback=None): super(PGPVNewCrashSource, self).__init__(config, name, quit_check_callback) self.data = ( config.date, config.date + timedelta(1), # add a day config.date)
class ESCrashStorageRedactedSave(ESCrashStorage): required_config = Namespace() required_config.namespace('es_redactor') required_config.es_redactor.add_option( name="redactor_class", doc="the name of the class that implements a 'redact' method", default='socorro.external.crashstorage_base.Redactor', from_string_converter=class_converter, ) required_config.es_redactor.forbidden_keys = change_default( Redactor, "forbidden_keys", "json_dump, " "upload_file_minidump_flash1.json_dump, " "upload_file_minidump_flash2.json_dump, " "upload_file_minidump_browser.json_dump") #-------------------------------------------------------------------------- def __init__(self, config, quit_check_callback=None): super(ESCrashStorageRedactedSave, self).__init__(config, quit_check_callback) self.redactor = config.es_redactor.redactor_class(config.es_redactor) self.config.logger.warning( "Beware, this crashstorage class is destructive to the " "processed crash - if you're using a polycrashstore you may " "find the modified processed crash saved to the other crashstores." ) #-------------------------------------------------------------------------- def save_raw_and_processed(self, raw_crash, dumps, processed_crash, crash_id): """This is the only write mechanism that is actually employed in normal usage. """ self.redactor.redact(processed_crash) super(ESCrashStorageRedactedSave, self).save_raw_and_processed(raw_crash, dumps, processed_crash, crash_id)
class CorrelationInterestingModulesRule(CorrelationRule): """this class attempts to be a faithful reproduction of the function of the original dbaron the "per-crash-interesting-modules.py" application embodied as a Socorro TransformRule. Individual crashes will be offered to this rule by a Fetch Transform Save app through the "_action_" method. This class will examine the crash and to counters build on an instance of a ProductVersionMapping. The counter add structure it builds looks like this: pv_counters[os_name*] .count .signatures[a_signature*] .count .modules[a_module*] .count .versions[a_version*] int .modules[a_module*] .count .versions[a_version*] int """ required_config = Namespace() required_config.add_option("show_versions", doc="Show data on module versions", default=False) required_config.add_option("addons", doc="Tabulate addons (rather than modules)", default=False) required_config.add_option("min_baseline_diff", doc="a floating point number", default=0.05) required_config.namespace('output') required_config.output.output_class = change_default( CorrelationRule, 'output.output_class', 'socorro.analysis.correlations.interesting_rule' '.FileOutputForInterestingModules', new_reference_value='global.correlations.interesting') #-------------------------------------------------------------------------- def version(self): return '1.0' #-------------------------------------------------------------------------- def __init__(self, config=None, quit_check_callback=None): super(CorrelationInterestingModulesRule, self).__init__(config, quit_check_callback) for an_accumulator in self.counters_for_all_producs_and_versions.values( ): an_accumulator.osyses = {} self.date_suffix = defaultdict(int) self.summary_names = { #(show_versions, addons) (False, False): 'interesting-modules', (True, False): 'interesting-modules-with-versions', (False, True): 'interesting-addons', (True, True): 'interesting-addons-with-versions', } #-------------------------------------------------------------------------- def summary_name(self): return self.summary_names[( self.config.show_versions, self.config.addons, )] #-------------------------------------------------------------------------- @staticmethod def contains_bare_address(a_signature): return re.search(r"\S+@0x[0-9a-fA-F]+$", a_signature) is not None #-------------------------------------------------------------------------- @staticmethod def remove_bare_address_from_signature(a_signature): return re.sub(r"@0x[0-9a-fA-F]+$", "", a_signature) #-------------------------------------------------------------------------- def _action(self, raw, dumps, crash, processor_meta): self.date_suffix[crash['crash_id'][-6:]] += 1 if not "os_name" in crash: # We have some bad crash reports. return False # give the names of the old algorithm's critical variables to their # variables in the new system try: osyses = self.counters_for_all_producs_and_versions[( crash["product"], crash["version"])].osyses self.counters_for_all_producs_and_versions[( crash["product"], crash["version"])].counter += 1 except (AttributeError, KeyError): # why both types? crashes can be represented by either the Socorro # or configman DotDict types which raise different exception on # not finding a key. osyses = {} self.counters_for_all_producs_and_versions[( crash["product"], crash["version"])].osyses = osyses self.counters_for_all_producs_and_versions[( crash["product"], crash["version"])].counter = 1 options = self.config # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # begin - refactored code section # unlike the "core count correlation report", this code from the # was refactored to help understand the structure of the counters # so that a generic summary structure could be made. This allows # for output of the summary information to somewhere other than # stdout. # # the structure has been broken down into levels of regular dicts # and SocorroDotDicts. The DotDicts have keys that are constant # and no more are added when new crashes come in. The regular dicts # are key with variable things that come in with crashes. In the # structure below, keys of DotDicts are shown as constants like # ".count" and ".modules". The keys of the dicts are shown as the # name of a field with a * (to designate zero or more) inside square # brackets. # # the counters structure looks like this: # pv_counters[os_name*] # .count # .signatures[a_signature*] # .count # .modules[a_module*] # .count # .versions[a_version*] int # .modules[a_module*] # .count # .versions[a_version*] int # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - os_name = crash["os_name"] # The os_version field is way too specific on Linux, and we don't # have much Linux data anyway. if options.by_os_version and os_name != "Linux": os_name = os_name + " " + crash["os_version"] counters_for_an_os = osyses.setdefault( os_name, SocorroDotDict({ "count": 0, "signatures": {}, "modules": {}, })) a_signature = crash["signature"] if self.contains_bare_address(a_signature): if options.condense: # Condense all signatures in a given DLL. a_signature = self.remove_bare_address_from_signature( a_signature) if "reason" in crash and crash["reason"] is not None: a_signature = a_signature + "|" + crash["reason"] counters_for_a_signature = counters_for_an_os.signatures.setdefault( a_signature, SocorroDotDict({ "count": 0, "modules": {} }), ) list_of_counters = [counters_for_an_os, counters_for_a_signature] # increment both the os & signature counters for a_counter in list_of_counters: a_counter.count += 1 for libname, version in self.generate_modules_or_addons(crash): # Increment the global count on osys and the per-signature count. for a_counter in list_of_counters: counters_for_modules = a_counter.modules.setdefault( libname, SocorroDotDict({ "count": 0, "versions": defaultdict(int), })) counters_for_modules.count += 1 # Count versions of each module as well. counters_for_modules.versions[version] += 1 # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # end - refactored code section # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - return True #-------------------------------------------------------------------------- def _summary_for_a_product_version_pair(self, a_pv_accumulator): """in the original code, the counter structures were walked and manipulated to form the statistics. Once a stat was determined, it was printed to stdout. Since we want to have various means of outputting the data, instead of printing to stdout, this method save the statistic in a "summary_structure" This structure will later be walked for printing or output to some future storage scheme The summary structure looks like this: pv_summary .date_key # a list of the last six UUID characters present .notes # any notes added by the algorithm to tell of problems .os_counters[os_name*] .count .signatures[a_signature*] .count .in_sig_ratio .in_os_ratio .in_os_count .osys_count .modules[a_module*] # may be addons .in_sig_ratio .in_os_ratio .in_os_count .osys_count .verisons[a_version*] # may be addon versions .sig_ver_ratio .sig_ver_count .sig_count .os_ver_ratio .os_ver_count .osys_count .version """ options = self.config pv_summary = SocorroDotDict({ 'notes': [], }) if (len(self.date_suffix) > 1): message = ("crashes from more than one day %s" % str(tuple(self.date_suffix.keys()))) ## self.config.logger.debug(message) pv_summary.notes.append(message) pv_summary.date_key = self.date_suffix.keys()[0] pv_summary.os_counters = {} MIN_CRASHES = self.config.min_crashes counters_for_multiple_os = a_pv_accumulator.osyses # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # begin - refactored code section # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - infostr_re = re.compile("^(.*) with (\d+) cores$") # unused? for os_name in counters_for_multiple_os.keys(): counters_for_an_os = counters_for_multiple_os[os_name] pv_summary.os_counters[os_name] = SocorroDotDict() pv_summary.os_counters[os_name].count = counters_for_multiple_os[ os_name].count pv_summary.os_counters[os_name].signatures = {} filtered_signatures = [(signature, signature_counter) for ( signature, signature_counter) in counters_for_an_os["signatures"].items() if signature_counter.count >= MIN_CRASHES] for a_signature, a_signtaure_counter in filtered_signatures: pv_summary.os_counters[os_name].signatures[ a_signature] = SocorroDotDict() pv_summary.os_counters[os_name].signatures[ a_signature].count = a_signtaure_counter.count pv_summary.os_counters[os_name].signatures[ a_signature].modules = {} modules_list = [ SocorroDotDict({ "libname": module_name, "in_sig_count": a_module_counter.count, "in_sig_ratio": float(a_module_counter.count) / a_signtaure_counter.count, "in_sig_versions": a_module_counter.versions, "in_os_count": counters_for_an_os.modules[module_name].count, "in_os_ratio": (float(counters_for_an_os.modules[module_name].count) / counters_for_an_os.count), "in_os_versions": counters_for_an_os.modules[module_name].versions }) for module_name, a_module_counter in a_signtaure_counter.modules.iteritems() ] modules_list = [ module for module in modules_list if module.in_sig_ratio - module.in_os_ratio >= self.config.min_baseline_diff ] modules_list.sort(key=lambda module: module.in_sig_ratio - module.in_os_ratio, reverse=True) for module in modules_list: module_name = module.libname if options.addons: info = addonids.info_for_id(module_name) if info is not None: module_name = ( module_name + u" ({0}, {1})".format(info.name, info.url)) if options.show_versions and len( module["in_os_versions"]) == 1: onlyver = module.in_os_versions.keys()[0] if os_name.startswith("Mac OS X"): info = macdebugids.info_for_id( module_name, onlyver) if info is not None: onlyver = onlyver + "; " + info if (onlyver != ""): module_name = module_name + " (" + onlyver + ")" pv_summary.os_counters[os_name].signatures[ a_signature].modules[module_name] = SocorroDotDict() pv_summary.os_counters[os_name].signatures[ a_signature].modules[module_name].in_sig_count = ( module.in_sig_count) pv_summary.os_counters[os_name].signatures[ a_signature].modules[module_name].in_sig_ratio = (int( round(module["in_sig_ratio"] * 100))) pv_summary.os_counters[os_name].signatures[ a_signature].modules[module_name].in_os_ratio = (int( round(module.in_os_ratio * 100))) pv_summary.os_counters[os_name].signatures[ a_signature].modules[module_name].in_os_count = ( module.in_os_count) pv_summary.os_counters[os_name].signatures[ a_signature].modules[module_name].osys_count = ( counters_for_an_os.count) if options.show_versions and len( module.in_os_versions) != 1: versions = module.in_os_versions.keys() versions.sort() pv_summary.os_counters[os_name].signatures[ a_signature].modules[module_name].versions = {} for version in versions: sig_ver_count = module.in_sig_versions.get( version, 0) os_ver_count = module.in_os_versions[version] if os_name.startswith("Mac OS X"): info = macdebugids.info_for_id( module_name, version) if info is not None: version = version + " (" + info + ")" pv_summary.os_counters[os_name].signatures[ a_signature].modules[module_name].versions[ version] = SocorroDotDict() pv_summary.os_counters[os_name].signatures[ a_signature].modules[module_name].versions[ version].sig_ver_ratio = (int( round( float(sig_ver_count) / a_signtaure_counter.count * 100))) pv_summary.os_counters[os_name].signatures[ a_signature].modules[module_name].versions[ version].sig_ver_count = sig_ver_count pv_summary.os_counters[os_name].signatures[ a_signature].modules[module_name].versions[ version].sig_count = a_signtaure_counter.count pv_summary.os_counters[os_name].signatures[ a_signature].modules[module_name].versions[ version].os_ver_ratio = (int( round( float(os_ver_count) / counters_for_an_os.count * 100))) pv_summary.os_counters[os_name].signatures[ a_signature].modules[module_name].versions[ version].os_ver_count = os_ver_count pv_summary.os_counters[os_name].signatures[ a_signature].modules[module_name].versions[ version].osys_count = counters_for_an_os.count pv_summary.os_counters[os_name].signatures[ a_signature].modules[module_name].versions[ version].version = version # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # end - refactored code section # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - return pv_summary #-------------------------------------------------------------------------- def generate_modules_or_addons(self, crash): options = self.config if (options.addons): for addon in crash["addons"]: yield addon[0], addon[1] else: if "json_dump" in crash and "modules" in crash["json_dump"]: for module in crash["json_dump"]["modules"]: libname = module["filename"] version = module["version"] pdb = module["debug_file"] # never used? checksum = module["debug_id"] addrstart = module["base_addr"] # vener used? addrend = module["end_addr"] # never used? if crash["os_name"].startswith("Win"): # We only have good version data on Windows. yield libname, version else: yield libname, checksum #-------------------------------------------------------------------------- def summarize(self): # for each product version pair in the accumulators summary = {} for pv, an_accumulator in self.counters_for_all_producs_and_versions.iteritems( ): summary['_'.join(pv)] = self._summary_for_a_product_version_pair( an_accumulator) return summary
class CorrelationCoreCountRule(CorrelationRule): """this class attempts to be a faithful reproduction of the function of the original dbaron the "per-crash-core-count.py" application embodied as a Socorro TransformRule. Individual crashes will be offered to this rule by a Fetch Transform Save app through the "_action_" method. This class will examine the crash and to counters build on an instance of a ProductVersionMapping. The counter add structure it builds looks like this: a_product_version_mapping[product_version*] .osyses[operating_system_name*] .count .signature[a_signature*] .count .core_counts[number_of_cores*] .core_counts[number_of_cores*] """ required_config = Namespace() required_config.namespace('output') required_config.output.output_class = change_default( CorrelationRule, 'output.output_class', 'socorro.analysis.correlations.core_count_rule' '.FileOutputForCoreCounts', new_reference_value='global.correlations.core') #-------------------------------------------------------------------------- def version(self): return '1.0' #-------------------------------------------------------------------------- def __init__(self, config=None, quit_check_callback=None): super(CorrelationCoreCountRule, self).__init__(config, quit_check_callback) for an_accumulator in self.counters_for_all_producs_and_versions.values( ): an_accumulator.osyses = {} self.date_suffix = defaultdict(int) #-------------------------------------------------------------------------- def summary_name(self): return 'core-counts' #-------------------------------------------------------------------------- def _action(self, raw, dumps, crash, processor_meta): self.date_suffix[crash['crash_id'][-6:]] += 1 if not "os_name" in crash: # We have some bad crash reports. return False # give the names of the old algorithm's critical variables to their # variables in the new system # what does "osyses" mean? this is the original variable name from # the dbaron correlation scripts for a mapping of each os name to the # counters for the signatures & crashes for that os. try: osyses = self.counters_for_all_producs_and_versions[( crash["product"], crash["version"])].osyses self.counters_for_all_producs_and_versions[( crash["product"], crash["version"])].counter += 1 except (AttributeError, KeyError): osyses = {} self.counters_for_all_producs_and_versions[( crash["product"], crash["version"])].osyses = osyses self.counters_for_all_producs_and_versions[( crash["product"], crash["version"])].counter = 1 options = self.config # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # begin - original unaltered code section # to not introduce errors, this code was not refactored to produce more # comprehensible variable names or adopt current style guides. # glossary of names: # osyses - a mapping keyed by the name of an OS # osys - the counter structure for an individual OS # signame - a signature # signature - the counter structure for a signature # accumulate_objs = a list of counter structures # obj = a counter as a loop variable # crash = a socorro processed crash # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - osname = crash["os_name"] # The os_version field is way too specific on Linux, and we don't # have much Linux data anyway. if options.by_os_version and osname != "Linux": osname = osname + " " + crash["os_version"] osys = osyses.setdefault(osname, { "count": 0, "signatures": {}, "core_counts": {} }) signame = crash["signature"] if re.search(r"\S+@0x[0-9a-fA-F]+$", signame) is not None: if options.condense: # Condense all signatures in a given DLL. signame = re.sub(r"@0x[0-9a-fA-F]+$", "", signame) if "reason" in crash and crash["reason"] is not None: signame = signame + "|" + crash["reason"] signature = osys["signatures"].setdefault(signame, { "count": 0, "core_counts": {} }) accumulate_objs = [osys, signature] for obj in accumulate_objs: obj["count"] = obj["count"] + 1 if "json_dump" in crash and "system_info" in crash["json_dump"]: family = crash["json_dump"]["system_info"]["cpu_arch"] details = crash["json_dump"]["system_info"]["cpu_info"] # unused? cores = crash["json_dump"]["system_info"]["cpu_count"] infostr = family + " with " + str(cores) + " cores" # Increment the global count on osys and the per-signature count. for obj in accumulate_objs: obj["core_counts"][infostr] = \ obj["core_counts"].get(infostr, 0) + 1 # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # end - original unaltered code section # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - return True #-------------------------------------------------------------------------- def _summary_for_a_product_version_pair(self, an_accumulator): """in the original code, the counter structures were walked and manipulated to form the statistics. Once a stat was determined, it was printed to stdout. Since we want to have various means of outputting the data, instead of printing to stdout, this method save the statistic in a "summary_structure" This structure will later be walked for printing or output to some future storage scheme The summary structure looks like this: summary[product_version*] .note - a list of comments by the algorithm [os_name] .count .signatures[signame*] .name .count .cores[number_of_cores] .in_sig_count .in_sig_ratio .rounded_in_sig_ratio .in_os_count .in_os_ratio .rounded_in_os_ratio """ pv_summary = { 'notes': [], } if (len(self.date_suffix) > 1): message = ("crashes from more than one day %s" % str(tuple(self.date_suffix.keys()))) pv_summary['notes'].append(message) pv_summary['date_key'] = self.date_suffix.keys()[0] MIN_CRASHES = self.config.min_crashes osyses = an_accumulator.osyses # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # begin - minimally altered section from original code # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - infostr_re = re.compile("^(.*) with (\d+) cores$") #---------------------------------------------------------------------- def cmp_infostr(x, y): (familyx, coresx) = infostr_re.match(x).groups() (familyy, coresy) = infostr_re.match(y).groups() if familyx != familyy: return cmp(familyx, familyy) return cmp(int(coresx), int(coresy)) #---------------------------------------------------------------------- sorted_osyses = osyses.keys() sorted_osyses.sort() for osname in sorted_osyses: osys = osyses[osname] pv_summary[osname] = SocorroDotDict() pv_summary[osname].count = osys['count'] pv_summary[osname].signatures = {} sorted_signatures = [ sig for sig in osys["signatures"].items() if sig[1]["count"] >= MIN_CRASHES ] sorted_signatures.sort(key=lambda tuple: tuple[1]["count"], reverse=True) sorted_cores = osys["core_counts"].keys() # strongly suspect that sorting is useless here sorted_cores.sort(cmp=cmp_infostr) for signame, sig in sorted_signatures: pv_summary[osname].signatures[signame] = SocorroDotDict({ 'name': signame, 'count': sig['count'], 'cores': {}, }) by_number_of_cores = \ pv_summary[osname].signatures[signame].cores for cores in sorted_cores: by_number_of_cores[cores] = SocorroDotDict() in_sig_count = sig["core_counts"].get(cores, 0) in_sig_ratio = float(in_sig_count) / sig["count"] in_os_count = osys["core_counts"][cores] in_os_ratio = float(in_os_count) / osys["count"] rounded_in_sig_ratio = int(round(in_sig_ratio * 100)) rounded_in_os_ratio = int(round(in_os_ratio * 100)) by_number_of_cores[cores].in_sig_count = in_sig_count by_number_of_cores[cores].in_sig_ratio = in_sig_ratio by_number_of_cores[cores].rounded_in_sig_ratio = \ rounded_in_sig_ratio by_number_of_cores[cores].in_os_count = in_os_count by_number_of_cores[cores].in_os_ratio = in_os_ratio by_number_of_cores[cores].rounded_in_os_ratio = \ rounded_in_os_ratio # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # end - minimally altered code section # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - return pv_summary #-------------------------------------------------------------------------- def summarize(self): # for each product version pair in the accumulators summary = {} for pv, counters_for_pv in self.counters_for_all_producs_and_versions.iteritems( ): summary['_'.join(pv)] = self._summary_for_a_product_version_pair( counters_for_pv) return summary
class BotoS3CrashStorage(BotoCrashStorage): required_config = Namespace() required_config.resource_class = change_default( BotoCrashStorage, 'resource_class', 'socorro.external.boto.connection_context.RegionalS3ConnectionContext')
class TelemetryBotoS3CrashStorage(BotoS3CrashStorage): """S3 crash storage class for sending a subset of the processed crash but reduced to only include the files in the processed crash JSON Schema.""" required_config = Namespace() required_config.resource_class = change_default( BotoCrashStorage, 'resource_class', 'socorro.external.boto.connection_context.RegionalS3ConnectionContext') required_config.elasticsearch = Namespace() required_config.elasticsearch.add_option( 'elasticsearch_class', default='socorro.external.es.connection_context.ConnectionContext', from_string_converter=class_converter, reference_value_from='resource.elasticsearch', ) def __init__(self, config, *args, **kwargs): # This class requires that we use # SimpleDatePrefixKeyBuilder, so we stomp on the configuration # to make absolutely sure it gets set that way. config.keybuilder_class = SimpleDatePrefixKeyBuilder super(TelemetryBotoS3CrashStorage, self).__init__(config, *args, **kwargs) def _get_all_fields(self): if (hasattr(self, '_all_fields') and hasattr(self, '_all_fields_timestamp')): # we might have it cached age = time.time() - self._all_fields_timestamp if age < 60 * 60: # fresh enough return self._all_fields self._all_fields = SuperSearchFields(config=self.config).get() self._all_fields_timestamp = time.time() return self._all_fields def save_raw_and_processed(self, raw_crash, dumps, processed_crash, crash_id): all_fields = self._get_all_fields() crash_report = {} # TODO Opportunity of optimization; # We could inspect CRASH_REPORT_JSON_SCHEMA and get a list # of all (recursive) keys that are in there and use that # to limit the two following loops to not bother # filling up `crash_report` with keys that will never be # needed. # Rename fields in raw_crash. raw_fields_map = dict((x['in_database_name'], x['name']) for x in all_fields.values() if x['namespace'] == 'raw_crash') for key, val in raw_crash.items(): crash_report[raw_fields_map.get(key, key)] = val # Rename fields in processed_crash. processed_fields_map = dict((x['in_database_name'], x['name']) for x in all_fields.values() if x['namespace'] == 'processed_crash') for key, val in processed_crash.items(): crash_report[processed_fields_map.get(key, key)] = val # Validate crash_report. crash_report = json_schema_reducer.make_reduced_dict( CRASH_REPORT_JSON_SCHEMA, crash_report) self.save_processed(crash_report) @staticmethod def _do_save_processed(boto_connection, processed_crash): """Overriding this method so we can control the "name of thing" prefix used to upload to S3.""" crash_id = processed_crash['uuid'] processed_crash_as_string = boto_connection._convert_mapping_to_string( processed_crash) boto_connection.submit(crash_id, "crash_report", processed_crash_as_string)
class BreakpadStackwalkerRule2015(ExternalProcessRule): required_config = Namespace() required_config.add_option(name='public_symbols_url', doc='url of the public symbol server', default="https://localhost", likely_to_be_changed=True) required_config.add_option(name='private_symbols_url', doc='url of the private symbol server', default="https://localhost", likely_to_be_changed=True) required_config.command_line = change_default( ExternalProcessRule, 'command_line', 'timeout -s KILL 30 {command_pathname} ' '--raw-json {raw_crash_pathname} ' '--symbols-url {public_symbols_url} ' '--symbols-url {private_symbols_url} ' '--symbols-cache {symbol_cache_path} ' '{dump_file_pathname} ' '2>/dev/null') required_config.command_pathname = change_default( ExternalProcessRule, 'command_pathname', '/data/socorro/stackwalk/bin/stackwalker', ) required_config.add_option( 'symbol_cache_path', doc='the path where the symbol cache is found, this location must be ' 'readable and writeable (quote path with embedded spaces)', default=os.path.join(tempfile.gettempdir(), 'symbols'), ) required_config.add_option( 'temporary_file_system_storage_path', doc='a path where temporary files may be written', default=tempfile.gettempdir(), ) #-------------------------------------------------------------------------- def version(self): return '1.0' #-------------------------------------------------------------------------- @contextmanager def _temp_raw_crash_json_file(self, raw_crash, crash_id): file_pathname = os.path.join( self.config.temporary_file_system_storage_path, "%s.%s.TEMPORARY.json" % (crash_id, threading.currentThread().getName())) with open(file_pathname, "w") as f: ujson.dump(raw_crash, f) try: yield file_pathname finally: os.unlink(file_pathname) #-------------------------------------------------------------------------- def _execute_external_process(self, command_line, processor_meta): stackwalker_output, return_code = super( BreakpadStackwalkerRule2015, self)._execute_external_process(command_line, processor_meta) if not isinstance(stackwalker_output, Mapping): processor_meta.processor_notes.append( "MDSW produced unexpected output: %s..." % str(stackwalker_output)[:10]) stackwalker_output = {} stackwalker_data = DotDict() stackwalker_data.json_dump = stackwalker_output stackwalker_data.mdsw_return_code = return_code stackwalker_data.mdsw_status_string = stackwalker_output.get( 'status', 'unknown error') stackwalker_data.success = stackwalker_data.mdsw_status_string == 'OK' if return_code == 124: processor_meta.processor_notes.append( "MDSW terminated with SIGKILL due to timeout") elif return_code != 0 or not stackwalker_data.success: processor_meta.processor_notes.append( "MDSW failed on '%s': %s" % (command_line, stackwalker_data.mdsw_status_string)) return stackwalker_data, return_code #-------------------------------------------------------------------------- def _action(self, raw_crash, raw_dumps, processed_crash, processor_meta): if 'additional_minidumps' not in processed_crash: processed_crash.additional_minidumps = [] with self._temp_raw_crash_json_file( raw_crash, raw_crash.uuid) as raw_crash_pathname: for dump_name in raw_dumps.iterkeys(): if processor_meta.quit_check: processor_meta.quit_check() # this rule is only interested in dumps targeted for the # minidump stackwalker external program. As of the writing # of this code, there is one other dump type. The only way # to differentiate these dump types is by the name of the # dump. All minidumps targeted for the stackwalker will have # a name with a prefix specified in configuration: if not dump_name.startswith(self.config.dump_field): # dumps not intended for the stackwalker are ignored continue dump_pathname = raw_dumps[dump_name] if self.config.chatty: self.config.logger.debug("BreakpadStackwalkerRule: %s, %s", dump_name, dump_pathname) command_line = self.config.command_line.format( **dict(self.config, dump_file_pathname=dump_pathname, raw_crash_pathname=raw_crash_pathname)) stackwalker_data, return_code = self._execute_external_process( command_line, processor_meta) if dump_name == self.config.dump_field: processed_crash.update(stackwalker_data) else: processed_crash.additional_minidumps.append(dump_name) processed_crash[dump_name] = stackwalker_data return True