def fix_data_in_s3(fields, bucket, s3_client, crash_data): """Fix data in raw_crash file in S3.""" crashid = crash_data["crashid"] path = "v2/raw_crash/%(entropy)s/%(date)s/%(crashid)s" % { "entropy": crashid[:3], "date": date_from_ooid(crashid).strftime("%Y%m%d"), "crashid": crashid, } resp = s3_client.get_object(Bucket=bucket, Key=path) raw_crash_as_string = resp["Body"].read() data = json.loads(raw_crash_as_string) should_save = False for field in fields: if field in data: del data[field] should_save = True if should_save: s3_client.upload_fileobj( Fileobj=io.BytesIO(dict_to_str(data).encode("utf-8")), Bucket=bucket, Key=path, ) click.echo("# s3: fixed raw crash") else: click.echo("# s3: raw crash was fine")
def check_elasticsearch(supersearch, crash_ids): """Checks Elasticsearch and returns list of missing crash ids. Crash ids should all be on the same day. """ crash_ids = [crash_ids] if isinstance(crash_ids, str) else crash_ids crash_date = date_from_ooid(crash_ids[0]) # The datestamp in the crashid doesn't match the processed date sometimes especially # when the crash came in at the end of the day. start_date = (crash_date - datetime.timedelta(days=5)).strftime("%Y-%m-%d") end_date = (crash_date + datetime.timedelta(days=5)).strftime("%Y-%m-%d") params = { "uuid": crash_ids, "date": [">=%s" % start_date, "<=%s" % end_date], "_columns": ["uuid"], "_facets": [], "_facets_size": 0, } search_results = supersearch.get(**params) crash_ids_in_es = [hit["uuid"] for hit in search_results["hits"]] return set(crash_ids) - set(crash_ids_in_es)
def _get_datestamp(self, crashid): """Retrieves datestamp from a crashid or raises an exception""" datestamp = date_from_ooid(crashid) if datestamp is None: # We should never hit this situation unless the crashid is not valid raise CrashidMissingDatestamp('%s is missing datestamp' % crashid) return datestamp
def _get_datestamp(self, crashid): """Retrieves datestamp from a crashid or raises an exception""" datestamp = date_from_ooid(crashid) if datestamp is None: # We should never hit this situation unless the crashid is not valid raise CrashidMissingDatestamp('%s is missing datestamp' % crashid) return datestamp
def action(self, raw_crash, raw_dumps, processed_crash, processor_meta): processor_notes = processor_meta.processor_notes processed_crash.submitted_timestamp = raw_crash.get( "submitted_timestamp", date_from_ooid(raw_crash.uuid)) if isinstance(processed_crash.submitted_timestamp, str): processed_crash.submitted_timestamp = datetime_from_isodate_string( processed_crash.submitted_timestamp) processed_crash.date_processed = processed_crash.submitted_timestamp # defaultCrashTime: must have crashed before date processed submitted_timestamp_as_epoch = int( time.mktime(processed_crash.submitted_timestamp.timetuple())) try: # the old name for crash time timestampTime = int( raw_crash.get("timestamp", submitted_timestamp_as_epoch)) except ValueError: timestampTime = 0 processor_notes.append('non-integer value of "timestamp"') try: crash_time = int( self._get_truncate_or_warn(raw_crash, "CrashTime", processor_notes, timestampTime, 10)) except ValueError: crash_time = 0 processor_notes.append('non-integer value of "CrashTime" (%s)' % raw_crash.CrashTime) processed_crash.crash_time = crash_time if crash_time == submitted_timestamp_as_epoch: processor_notes.append("client_crash_date is unknown") # StartupTime: must have started up some time before crash try: startupTime = int(raw_crash.get("StartupTime", crash_time)) except ValueError: startupTime = 0 processor_notes.append('non-integer value of "StartupTime"') # InstallTime: must have installed some time before startup try: installTime = int(raw_crash.get("InstallTime", startupTime)) except ValueError: installTime = 0 processor_notes.append('non-integer value of "InstallTime"') processed_crash.client_crash_date = datetime.datetime.fromtimestamp( crash_time, UTC) processed_crash.install_age = crash_time - installTime processed_crash.uptime = max(0, crash_time - startupTime) try: last_crash = int(raw_crash.SecondsSinceLastCrash) except (KeyError, TypeError, ValueError): last_crash = None processor_notes.append( 'non-integer value of "SecondsSinceLastCrash"') if last_crash and last_crash > MAXINT: last_crash = None processor_notes.append( '"SecondsSinceLastCrash" larger than MAXINT - set to NULL') processed_crash.last_crash = last_crash
def get_datestamp(crashid): """Parses out datestamp from a crashid. :returns: datetime :raises CrashIDMissingDatestamp: if the crash id has no datestamp at the end """ datestamp = date_from_ooid(crashid) if datestamp is None: # We should never hit this situation unless the crashid is not valid raise CrashIDMissingDatestamp("%s is missing datestamp" % crashid) return datestamp
def create_processed_crash_in_es(self, es_conn, crash_id): crash_date = date_from_ooid(crash_id) document = { "crash_id": crash_id, "raw_crash": {}, "processed_crash": { "uuid": crash_id, "signature": "OOM | Small", "date_processed": crash_date, }, } index_name = crash_date.strftime(es_conn.get_index_template()) doctype = es_conn.get_doctype() with es_conn() as conn: conn.index(index=index_name, doc_type=doctype, body=document, id=crash_id) es_conn.refresh()
def check_elasticsearch(supersearch, crash_ids): """Checks Elasticsearch and returns list of missing crash ids. Crash ids should all be on the same day. """ crash_ids = [crash_ids] if isinstance(crash_ids, str) else crash_ids crash_date = date_from_ooid(crash_ids[0]) start_date = crash_date.strftime("%Y-%m-%d") end_date = (crash_date + datetime.timedelta(days=1)).strftime("%Y-%m-%d") params = { "uuid": crash_ids, "date": [">=%s" % start_date, "<=%s" % end_date], "_results_number": len(crash_ids), "_columns": ["uuid"], "_facets": [], "_facets_size": 0, } search_results = supersearch.get(**params) crash_ids_in_es = [hit["uuid"] for hit in search_results["hits"]] return set(crash_ids) - set(crash_ids_in_es)
def _get_base(self, crash_id): date = date_from_ooid(crash_id) if not date: date = utc_now() date_formatted = "%4d%02d%02d" % (date.year, date.month, date.day) return [self.config.fs_root, date_formatted]
def test_date_from_ooid(): crash_id = "3efa014e-a9e9-405d-ae7e-9def54181210" assert ooid.date_from_ooid(crash_id) == datetime.datetime(2018, 12, 10, tzinfo=UTC) crash_id = "3efa014e-a9e9-405d-ae7e-9def54ffffff" assert ooid.date_from_ooid(crash_id) is None
def test_date_from_ooid(): crash_id = '3efa014e-a9e9-405d-ae7e-9def54181210' assert ooid.date_from_ooid(crash_id) == datetime.datetime(2018, 12, 10, tzinfo=UTC) crash_id = '3efa014e-a9e9-405d-ae7e-9def54ffffff' assert ooid.date_from_ooid(crash_id) is None
def _get_base(self, crash_id): date = date_from_ooid(crash_id) if not date: date = utc_now() date_formatted = "%4d%02d%02d" % (date.year, date.month, date.day) return [self.config.fs_root, date_formatted]
def action(self, raw_crash, raw_dumps, processed_crash, processor_meta): processor_notes = processor_meta.processor_notes processed_crash.submitted_timestamp = raw_crash.get( 'submitted_timestamp', date_from_ooid(raw_crash.uuid) ) if isinstance(processed_crash.submitted_timestamp, str): processed_crash.submitted_timestamp = datetime_from_isodate_string( processed_crash.submitted_timestamp ) processed_crash.date_processed = processed_crash.submitted_timestamp # defaultCrashTime: must have crashed before date processed submitted_timestamp_as_epoch = int( time.mktime(processed_crash.submitted_timestamp.timetuple()) ) try: timestampTime = int( raw_crash.get('timestamp', submitted_timestamp_as_epoch) ) # the old name for crash time except ValueError: timestampTime = 0 processor_notes.append('non-integer value of "timestamp"') try: crash_time = int( self._get_truncate_or_warn( raw_crash, 'CrashTime', processor_notes, timestampTime, 10 ) ) except ValueError: crash_time = 0 processor_notes.append( 'non-integer value of "CrashTime" (%s)' % raw_crash.CrashTime ) processed_crash.crash_time = crash_time if crash_time == submitted_timestamp_as_epoch: processor_notes.append("client_crash_date is unknown") # StartupTime: must have started up some time before crash try: startupTime = int(raw_crash.get('StartupTime', crash_time)) except ValueError: startupTime = 0 processor_notes.append('non-integer value of "StartupTime"') # InstallTime: must have installed some time before startup try: installTime = int(raw_crash.get('InstallTime', startupTime)) except ValueError: installTime = 0 processor_notes.append('non-integer value of "InstallTime"') processed_crash.client_crash_date = datetime.datetime.fromtimestamp( crash_time, UTC ) processed_crash.install_age = crash_time - installTime processed_crash.uptime = max(0, crash_time - startupTime) try: last_crash = int(raw_crash.SecondsSinceLastCrash) except (KeyError, TypeError, ValueError): last_crash = None processor_notes.append( 'non-integer value of "SecondsSinceLastCrash"' ) if last_crash and last_crash > MAXINT: last_crash = None processor_notes.append( '"SecondsSinceLastCrash" larger than MAXINT - set to NULL' ) processed_crash.last_crash = last_crash