async def listen_for_messages(self, async_queue, log_message): """Listen for messages on the qpc topic. Once a message from one of these topics arrives, we add them to the passed in queue. :param consumer : Kafka consumer :returns None """ try: await self.consumer.start() except KafkaConnectionError: KAFKA_ERRORS.inc() print_error_loop_event() raise KafkaMsgHandlerError('Unable to connect to kafka server.') except Exception as err: # pylint: disable=broad-except KAFKA_ERRORS.inc() LOG.error(format_message( self.prefix, 'The following error occurred: %s' % err)) print_error_loop_event() LOG.info(log_message) try: # Consume messages async for msg in self.consumer: await async_queue.put(msg) except Exception as err: # pylint: disable=broad-except KAFKA_ERRORS.inc() LOG.error(format_message( self.prefix, 'The following error occurred: %s' % err)) print_error_loop_event() finally: # Will leave consumer group; perform autocommit if enabled. await self.consumer.stop()
def transition_to_validated(self): """Revalidate the slice because it is in the failed validation state.""" self.prefix = 'ATTEMPTING VALIDATION' LOG.info( format_message(self.prefix, 'Uploading hosts to inventory. State is "%s".' % self.report_or_slice.state, account_number=self.account_number, report_platform_id=self.report_platform_id)) try: self.report_json = json.loads(self.report_or_slice.report_json) self.candidate_hosts = self._validate_report_details() # Here we want to update the report state of the actual report slice & when finished self.next_state = ReportSlice.VALIDATED options = {'candidate_hosts': self.candidate_hosts} self.update_object_state(options=options) except QPCReportException: # if any QPCReportExceptions occur, we know that the report is not valid but has been # successfully validated # that means that this slice is invalid and only awaits being archived self.next_state = ReportSlice.FAILED_VALIDATION self.update_object_state(options={}) except Exception as error: # pylint: disable=broad-except # This slice blew up validation - we want to retry it later, # which means it enters our odd state # of requiring validation LOG.error( format_message( self.prefix, 'The following error occurred: %s.' % str(error))) self.determine_retry(ReportSlice.FAILED_VALIDATION, ReportSlice.RETRY_VALIDATION, retry_type=ReportSlice.GIT_COMMIT)
async def transition_to_validation_reported(self): """Upload the validation status & move to validation reported state.""" self.prefix = 'ATTEMPTING STATUS UPLOAD' LOG.info( format_message(self.prefix, 'Uploading validation status "%s". State is "%s".' % (self.status, self.state), account_number=self.account_number, report_platform_id=self.report_platform_id)) try: message_hash = self.upload_message['request_id'] await self._send_confirmation(message_hash) self.next_state = Report.VALIDATION_REPORTED options = {'ready_to_archive': True} self.update_object_state(options=options) LOG.info( format_message(self.prefix, 'Status successfully uploaded.', account_number=self.account_number, report_platform_id=self.report_platform_id)) if self.status == FAILURE_CONFIRM_STATUS: options = {'retry': RETRY.keep_same, 'ready_to_archive': True} self.update_object_state(options=options) self.archive_report_and_slices() except Exception as error: # pylint: disable=broad-except LOG.error( format_message(self.prefix, 'The following error occurred: %s.' % str(error), account_number=self.account_number, report_platform_id=self.report_platform_id)) self.determine_retry(Report.FAILED_VALIDATION_REPORTING, Report.VALIDATED)
async def save_message_and_ack(self, consumer_record): """Save and ack the uploaded kafka message.""" self.prefix = 'SAVING MESSAGE' if consumer_record.topic == QPC_TOPIC: try: missing_fields = [] self.upload_message = self.unpack_consumer_record(consumer_record) # rh_account is being deprecated so we use it as a backup if # account is not there rh_account = self.upload_message.get('rh_account') request_id = self.upload_message.get('request_id') url = self.upload_message.get('url') self.account_number = self.upload_message.get('account', rh_account) if not self.account_number: missing_fields.append('account') if not request_id: missing_fields.append('request_id') if not url: missing_fields.append('url') if missing_fields: raise QPCKafkaMsgException( format_message( self.prefix, 'Message missing required field(s): %s.' % ', '.join(missing_fields))) self.check_if_url_expired(url, request_id) try: uploaded_report = { 'upload_srv_kafka_msg': json.dumps(self.upload_message), 'account': self.account_number, 'request_id': request_id, 'state': Report.NEW, 'state_info': json.dumps([Report.NEW]), 'last_update_time': datetime.now(pytz.utc), 'arrival_time': datetime.now(pytz.utc), 'retry_count': 0 } report_serializer = ReportSerializer(data=uploaded_report) report_serializer.is_valid(raise_exception=True) report_serializer.save() MSG_UPLOADS.labels(account_number=self.account_number).inc() LOG.info(format_message( self.prefix, 'Upload service message saved with request_id: %s. Ready for processing.' % request_id)) await self.consumer.commit() except Exception as error: # pylint: disable=broad-except DB_ERRORS.inc() LOG.error(format_message( self.prefix, 'The following error occurred while trying to save and ' 'commit the message: %s' % error)) print_error_loop_event() except QPCKafkaMsgException as message_error: LOG.error(format_message( self.prefix, 'Error processing records. Message: %s, Error: %s' % (consumer_record, message_error))) await self.consumer.commit() else: LOG.debug(format_message( self.prefix, 'Message not on %s topic: %s' % (QPC_TOPIC, consumer_record)))
def transition_to_downloaded(self): """Attempt to download report, extract json, and create slices. As long as we have one valid slice, we set the status to success. """ self.prefix = 'ATTEMPTING DOWNLOAD' report_download_failed_msg = \ 'The report could not be downloaded due to the following error: %s.' LOG.info( format_message( self.prefix, 'Attempting to download the report and extract the json. ' 'State is "%s".' % self.report_or_slice.state, account_number=self.account_number)) try: report_tar = self._download_report() options = self._extract_and_create_slices(report_tar) self.next_state = Report.DOWNLOADED # update the report or slice with downloaded info self.update_object_state(options=options) self.deduplicate_reports() except (FailDownloadException, FailExtractException) as err: LOG.error( format_message(self.prefix, report_download_failed_msg % err, account_number=self.account_number)) self.next_state = Report.FAILED_DOWNLOAD options = {'ready_to_archive': True} self.update_object_state(options=options) except (RetryDownloadException, RetryExtractException) as err: LOG.error( format_message(self.prefix, report_download_failed_msg % err, account_number=self.account_number)) self.determine_retry(Report.FAILED_DOWNLOAD, Report.STARTED)
async def transition_to_upload_object_storage(self): """Upload slice to object storage.""" self.prefix = 'ATTEMPTING OBJECT STORAGE UPLOAD' LOG.info( format_message(self.prefix, 'Uploading data to Object Storage. State is "%s".' % self.report_or_slice.state, account_number=self.account_number, report_platform_id=self.report_platform_id)) try: await self._upload_to_object_storage() LOG.info( format_message(self.prefix, 'All metrics were successfully uploaded.', account_number=self.account_number, report_platform_id=self.report_platform_id)) self.next_state = ReportSlice.METRICS_UPLOADED options = {'ready_to_archive': True} self.update_object_state(options=options) except Exception as error: # pylint: disable=broad-except OBJECTSTORE_ERRORS.inc() LOG.error( format_message(self.prefix, 'The following error occurred: %s.' % str(error), account_number=self.account_number, report_platform_id=self.report_platform_id)) self.determine_retry(ReportSlice.FAILED_METRICS_UPLOAD, ReportSlice.VALIDATED, retry_type=ReportSlice.TIME)
def transition_to_validated(self): """Validate that the report contents & move to validated state.""" self.prefix = 'ATTEMPTING VALIDATE' LOG.info( format_message(self.prefix, 'Validating the report contents. State is "%s".' % self.report_or_slice.state, account_number=self.account_number)) # find all associated report slices report_slices = ReportSlice.objects.all().filter( report=self.report_or_slice) self.status = FAILURE_CONFIRM_STATUS for report_slice in report_slices: try: self.report_json = json.loads(report_slice.report_json) candidate_hosts = self._validate_report_details() if candidate_hosts: self.status = SUCCESS_CONFIRM_STATUS # Here we want to update the report state of the actual report slice options = { 'state': ReportSlice.NEW, 'candidate_hosts': candidate_hosts } self.update_slice_state(options=options, report_slice=report_slice) except QPCReportException: # if any QPCReportExceptions occur, we know that the report is not valid # but has been successfully validated # that means that this slice is invalid and only awaits being archived options = { 'state': ReportSlice.FAILED_VALIDATION, 'ready_to_archive': True } self.update_slice_state(options=options, report_slice=report_slice) except Exception as error: # pylint: disable=broad-except # This slice blew up validation - we want to retry it later, # which means it enters our odd state of retrying validation LOG.error( format_message( self.prefix, 'The following error occurred: %s.' % str(error))) options = { 'state': ReportSlice.RETRY_VALIDATION, 'retry': RETRY.increment } self.update_slice_state(options=options, report_slice=report_slice) if self.status == 'failure': LOG.warning( format_message( self.prefix, 'The uploaded report was invalid. Status set to "%s".' % self.status, account_number=self.account_number)) self.next_state = Report.VALIDATED options = {'status': self.status} self.update_object_state(options=options)
def create_report_slice(self, options): """Create report slice. :param report_json: <dict> the report info in json format :param report_slice_id: <str> the report slice id :returns boolean regarding whether or not the slice was created. """ report_json = options.get('report_json') report_slice_id = options.get('report_slice_id') source = options.get('source') source_metadata = options.get('source_metadata') LOG.info( format_message(self.prefix, 'Creating report slice %s' % report_slice_id, account_number=self.account_number, report_platform_id=self.report_platform_id)) # first we should see if any slices exist with this slice id & report_platform_id # if they exist we will not create the slice created = False existing_report_slices = ReportSlice.objects.filter( report_platform_id=self.report_platform_id).filter( report_slice_id=report_slice_id) if existing_report_slices.count() > 0: LOG.error( format_message( self.prefix, 'a report slice with the report_platform_id %s and report_slice_id %s ' 'already exists.' % (self.report_platform_id, report_slice_id), account_number=self.account_number, report_platform_id=self.report_platform_id)) return created report_slice = { 'state': ReportSlice.PENDING, 'account': self.account_number, 'state_info': json.dumps([ReportSlice.PENDING]), 'last_update_time': datetime.now(pytz.utc), 'retry_count': 0, 'report_json': json.dumps(report_json), 'report_platform_id': self.report_platform_id, 'report_slice_id': report_slice_id, 'report': self.report_or_slice.id, 'source': source, 'source_metadata': json.dumps(source_metadata), 'creation_time': datetime.now(pytz.utc) } slice_serializer = ReportSliceSerializer(data=report_slice) if slice_serializer.is_valid(raise_exception=True): slice_serializer.save() LOG.info( format_message(self.prefix, 'Successfully created report slice %s' % report_slice_id, account_number=self.account_number, report_platform_id=self.report_platform_id)) return True
async def _send_confirmation(self, file_hash): # pragma: no cover """ Send kafka validation message to Insights Upload service. When a new file lands for topic 'qpc' we must validate it so that it will be made permanently available to other apps listening on the 'available' topic. :param: file_hash (String): Hash for file being confirmed. :returns None """ self.prefix = 'REPORT VALIDATION STATE ON KAFKA' await self.producer.stop() self.producer = AIOKafkaProducer( loop=REPORT_PROCESSING_LOOP, bootstrap_servers=INSIGHTS_KAFKA_ADDRESS) try: await self.producer.start() except (KafkaConnectionError, TimeoutError, Exception): KAFKA_ERRORS.inc() self.should_run = False await self.producer.stop() stop_all_event_loops() raise KafkaMsgHandlerError( format_message( self.prefix, 'Unable to connect to kafka server. Closing producer.', account_number=self.account_number, report_platform_id=self.report_platform_id)) try: validation = { 'hash': file_hash, 'request_id': self.report_or_slice.request_id, 'validation': self.status } msg = bytes(json.dumps(validation), 'utf-8') await self.producer.send_and_wait(VALIDATION_TOPIC, msg) LOG.info( format_message( self.prefix, 'Send %s validation status to file upload on kafka' % self.status, account_number=self.account_number, report_platform_id=self.report_platform_id)) except Exception as err: # pylint: disable=broad-except KAFKA_ERRORS.inc() LOG.error( format_message(self.prefix, 'The following error occurred: %s' % err)) stop_all_event_loops() finally: await self.producer.stop()
def determine_retry(self, fail_state, current_state, candidate_hosts=None, retry_type=Report.TIME): """Determine if yupana should archive a report based on retry count. :param fail_state: <str> the final state if we have reached max retries :param current_state: <str> the current state we are in that we want to try again :param candidate_hosts: <list> the updated list of hosts that are still candidates :param retry_type: <str> either 'time' or 'commit' """ if (self.report_or_slice.retry_count + 1) >= RETRIES_ALLOWED: LOG.error(format_message( self.prefix, 'This %s has reached the retry limit of %s.' % (self.object_prefix.lower(), str(RETRIES_ALLOWED)), account_number=self.account_number, report_platform_id=self.report_platform_id)) self.next_state = fail_state candidates = None failed = None if self.candidate_hosts: self.move_candidates_to_failed() candidates = self.candidate_hosts failed = self.failed_hosts options = {'retry': RETRY.increment, 'retry_type': retry_type, 'candidate_hosts': candidates, 'failed_hosts': failed, 'ready_to_archive': True} self.update_object_state(options=options) else: self.next_state = current_state if retry_type == self.object_class.GIT_COMMIT: COMMIT_RETRIES.labels(account_number=self.account_number).inc() log_message = \ 'Saving the %s to retry when a new commit '\ 'is pushed. Retries: %s' % (self.object_prefix.lower(), str(self.report_or_slice.retry_count + 1)) else: TIME_RETRIES.labels(account_number=self.account_number).inc() log_message = \ 'Saving the %s to retry at in %s minutes. '\ 'Retries: %s' % (self.object_prefix.lower(), str(RETRY_TIME), str(self.report_or_slice.retry_count + 1)) LOG.error(format_message( self.prefix, log_message, account_number=self.account_number, report_platform_id=self.report_platform_id)) options = {'retry': RETRY.increment, 'retry_type': retry_type, 'candidate_hosts': candidate_hosts} self.update_object_state(options=options) self.reset_variables()
async def transition_to_hosts_uploaded(self): """Upload the host candidates to inventory & move to hosts_uploaded state.""" self.prefix = 'ATTEMPTING HOST UPLOAD' LOG.info( format_message(self.prefix, 'Uploading hosts to inventory. State is "%s".' % (self.report_or_slice.state), account_number=self.account_number, report_platform_id=self.report_platform_id)) request_id = None if self.report_or_slice.report: request_id = self.report_or_slice.report.request_id try: if self.candidate_hosts: candidates = self.generate_upload_candidates() await self._upload_to_host_inventory_via_kafka(candidates) LOG.info( format_message( self.prefix, 'All hosts were successfully uploaded (request_id:%s).' % request_id, account_number=self.account_number, report_platform_id=self.report_platform_id)) self.next_state = ReportSlice.HOSTS_UPLOADED options = {'candidate_hosts': [], 'ready_to_archive': True} self.update_object_state(options=options) else: # need to not upload, but archive bc no hosts were valid LOG.info( format_message( self.prefix, 'There are no valid hosts to upload (request_id:%s)' % request_id, account_number=self.account_number, report_platform_id=self.report_platform_id)) self.next_state = ReportSlice.FAILED_VALIDATION options = {'ready_to_archive': True} self.update_object_state(options=options) self.archive_report_and_slices() except Exception as error: # pylint: disable=broad-except LOG.error( format_message(self.prefix, 'The following error occurred: %s.' % str(error), account_number=self.account_number, report_platform_id=self.report_platform_id)) self.determine_retry(ReportSlice.FAILED_HOSTS_UPLOAD, ReportSlice.VALIDATED, retry_type=ReportSlice.TIME)
def _transform_mtu(self, host: dict): """Transform 'system_profile.network_interfaces[]['mtu'] to Integer.""" system_profile = host.get('system_profile', {}) network_interfaces = system_profile.get('network_interfaces') if not network_interfaces: return host mtu_transformed = False for nic in network_interfaces: if ('mtu' not in nic or not nic['mtu'] or isinstance(nic['mtu'], int)): continue nic['mtu'] = int(nic['mtu']) mtu_transformed = True if mtu_transformed: LOG.info( format_message( self.prefix, "Transformed mtu value to integer for host with FQDN '%s'" % (host.get('fqdn', '')), account_number=self.account_number, report_platform_id=self.report_platform_id)) return host
def _validate_report_details(self): # pylint: disable=too-many-locals """ Verify that the report contents are a valid marketplace report. :returns: tuple contain list of valid and invalid hosts """ self.prefix = 'VALIDATE REPORT STRUCTURE' required_keys = ['report_slice_id'] missing_keys = [] for key in required_keys: required_key = self.report_json.get(key) if not required_key: missing_keys.append(key) if missing_keys: missing_keys_str = ', '.join(missing_keys) raise MKTReportException( format_message(self.prefix, 'Report is missing required fields: %s.' % missing_keys_str, account_number=self.account_number, report_platform_id=self.report_platform_id)) return True
def assign_object(self): """Assign the object processor objects that are saved in the db. First priority is the oldest object in any state. We check to see if an appropriate amount of time has passed or code has changed before we retry this object. If none of the above qualify, we look for the oldest objects that are in the new state. """ self.prefix = 'ASSIGNING %s' % self.object_prefix object_found_message = 'Starting %s processor. State is "%s".' if self.report_or_slice is None: assigned = False oldest_object_to_retry = self.get_oldest_object_to_retry() if oldest_object_to_retry: assigned = True self.report_or_slice = oldest_object_to_retry self.next_state = oldest_object_to_retry.state LOG.info( format_message(self.prefix, object_found_message % (self.object_prefix.lower(), self.report_or_slice.state), account_number=self.account_number, report_platform_id=self.report_or_slice. report_platform_id)) options = {'retry': RETRY.keep_same} self.update_object_state(options=options) else: new_object = self.get_new_record() if new_object: assigned = True self.report_or_slice = new_object LOG.info( format_message(self.prefix, object_found_message % (self.object_prefix.lower(), self.report_or_slice.state), account_number=self.account_number, report_platform_id=self.report_or_slice. report_platform_id)) self.transition_to_started() if not assigned: object_not_found_message = \ 'No %s to be processed at this time. '\ 'Checking again in %s seconds.' \ % (self.object_prefix.lower() + 's', str(NEW_REPORT_QUERY_INTERVAL)) LOG.info(format_message(self.prefix, object_not_found_message))
async def run(self): """Run the garbage collector in a loop. Later, if we find that we want to stop looping, we can manipulate the class variable should_run. """ LOG.info(format_message( self.prefix, 'should_run value: %s and GARBAGE_COLLECTION_INTERVAL: %s' % (self.should_run, GARBAGE_COLLECTION_INTERVAL) )) while self.should_run: self.remove_outdated_archives() LOG.info( format_message( self.prefix, 'Going to sleep. ' 'Will check again for outdated archives in %s seconds.' % int(GARBAGE_COLLECTION_INTERVAL))) await asyncio.sleep(GARBAGE_COLLECTION_INTERVAL)
def unpack_consumer_record(self, consumer_record): """Decode the uploaded message and return it in JSON format.""" self.prefix = 'NEW REPORT UPLOAD' try: json_message = json.loads(consumer_record.value.decode('utf-8')) message = 'received on %s topic' % consumer_record.topic # rh_account is being deprecated so we use it as a backup if # account is not there rh_account = json_message.get('rh_account') self.account_number = json_message.get('account', rh_account) LOG.info(format_message(self.prefix, message, account_number=self.account_number)) LOG.debug(format_message( self.prefix, 'Message: %s' % str(consumer_record), account_number=self.account_number)) return json_message except ValueError: raise QPCKafkaMsgException(format_message( self.prefix, 'Upload service message not JSON.'))
def remove_outdated_archives(self): """Query for archived reports and delete them if they have come of age.""" current_time = datetime.now(pytz.utc) created_time_limit = current_time - timedelta( seconds=ARCHIVE_RECORD_RETENTION_PERIOD) # we only have to delete the archived reports because deleting an archived report deletes # all of the associated archived report slices outdated_report_archives = ReportArchive.objects.filter( processing_end_time__lte=created_time_limit) if outdated_report_archives: _, deleted_info = outdated_report_archives.delete() report_total = deleted_info.get('api.ReportArchive') report_slice_total = deleted_info.get('api.ReportSliceArchive') LOG.info( format_message( self.prefix, 'Deleted %s archived report(s) & ' '%s archived report slice(s) older than %s seconds.' % (report_total, report_slice_total, int(ARCHIVE_RECORD_RETENTION_PERIOD)))) else: LOG.info( format_message(self.prefix, 'No archived reports to delete.'))
def check_if_url_expired(self, url, request_id): """Validate if url is expired.""" self.prefix = 'NEW REPORT VALIDATION' parsed_url_query = parse_qs(urlparse(url).query) creation_timestamp = parsed_url_query['X-Amz-Date'] expire_time = timedelta(seconds=int(parsed_url_query['X-Amz-Expires'][0])) creation_datatime = datetime.strptime(str(creation_timestamp[0]), '%Y%m%dT%H%M%SZ') if datetime.now().replace(microsecond=0) > (creation_datatime + expire_time): raise QPCKafkaMsgException( format_message(self.prefix, 'Request_id = %s is already expired and cannot be processed:' 'Creation time = %s, Expiry interval = %s.' % (request_id, creation_datatime, expire_time)))
def _transform_os_release(self, host: dict): """Transform 'system_profile.os_release' label.""" system_profile = host.get('system_profile', {}) os_release = system_profile.get('os_release') if not isinstance(os_release, str): return host os_details = self._match_regex_and_find_os_details(os_release) if not os_details or not os_details['major']: del host['system_profile']['os_release'] LOG.info( format_message( self.prefix, "Removed empty os_release fact for host with FQDN '%s'" % (host.get('fqdn', '')), account_number=self.account_number, report_platform_id=self.report_platform_id)) return host host['system_profile']['os_release'] = os_details['version'] host['system_profile']['operating_system'] = { 'major': os_details['major'], 'minor': os_details['minor'] } if 'Red Hat' in os_details['name']: host['system_profile']['operating_system']['name'] = 'RHEL' if os_release == os_details['version']: return host LOG.info( format_message(self.prefix, "os_release transformed '%s' -> '%s'" % (os_release, os_details['version']), account_number=self.account_number, report_platform_id=self.report_platform_id)) return host
def log_time_stats(self, archived_rep): """Log the start/completion and processing times of the report.""" arrival_time = archived_rep.arrival_time processing_start_time = archived_rep.processing_start_time processing_end_time = archived_rep.processing_end_time # format arrival_time arrival_date_time = '{}: {}:{}:{:.2f}'.format( arrival_time.date(), arrival_time.hour, arrival_time.minute, arrival_time.second) completion_date_time = '{}: {}:{}:{:.2f}'.format( processing_end_time.date(), processing_end_time.hour, processing_end_time.minute, processing_end_time.second) # time in queue & processing in minutes total_hours_in_queue = int( (processing_start_time - arrival_time).total_seconds() / 3600) total_minutes_in_queue = int( (processing_start_time - arrival_time).total_seconds() / 60) total_seconds_in_queue = int( (processing_start_time - arrival_time).total_seconds() % 60) time_in_queue = '{}h {}m {}s'.format( total_hours_in_queue, total_minutes_in_queue, total_seconds_in_queue) total_processing_hours = int( (processing_end_time - processing_start_time).total_seconds() / 3600) total_processing_minutes = int( (processing_end_time - processing_start_time).total_seconds() / 60) total_processing_seconds = int( (processing_end_time - processing_start_time).total_seconds() % 60) time_processing = '{}h {}m {}s'.format( total_processing_hours, total_processing_minutes, total_processing_seconds) total_processing_time_in_seconds = \ int((processing_end_time - processing_start_time).total_seconds() % 60) REPORT_PROCESSING_LATENCY.observe(total_processing_time_in_seconds) report_time_facts = '\nArrival date & time: {} '\ '\nTime spent in queue: {}'\ '\nTime spent processing report: {}'\ '\nCompletion date & time: {}'.format( arrival_date_time, time_in_queue, time_processing, completion_date_time) LOG.info(format_message('REPORT TIME STATS', report_time_facts, account_number=self.account_number, report_platform_id=self.report_platform_id))
def _validate_report_hosts(self, report_slice_id, source_metadata): """Verify that report hosts contain canonical facts. :returns: tuple containing valid & invalid hosts """ hosts = self.report_json.get('hosts', []) prefix = 'VALIDATE HOSTS' candidate_hosts = [] hosts_without_facts = [] for host in hosts: host_uuid = str(uuid.uuid4()) host['account'] = self.account_number host_facts = host.get('facts', []) host_facts.append({ 'namespace': 'yupana', 'facts': { 'yupana_host_id': host_uuid, 'report_platform_id': str(self.report_platform_id), 'report_slice_id': str(report_slice_id), 'account': self.account_number, 'source': self.report_or_slice.source } }) host['stale_timestamp'] = self.get_stale_time() host['reporter'] = 'yupana' host['facts'] = host_facts found_facts = False for fact in CANONICAL_FACTS: if host.get(fact): found_facts = True break if not found_facts: INVALID_HOSTS.labels(account_number=self.account_number, source=self.report_or_slice.source).inc() hosts_without_facts.append({host_uuid: host}) candidate_hosts.append({host_uuid: host}) if hosts_without_facts: invalid_hosts_message = \ '%d host(s) found that contain(s) 0 canonical facts: %s.'\ 'Source metadata: %s' % (len(hosts_without_facts), hosts_without_facts, source_metadata) LOG.warning( format_message(prefix, invalid_hosts_message, account_number=self.account_number, report_platform_id=self.report_platform_id)) return candidate_hosts, hosts_without_facts
async def _upload_to_object_storage(self): """Upload to the metrics to object storage.""" self.prefix = 'UPLOAD TO METRICS TO OBJECT STORAGE' minio_client = self.get_minio_client() if minio_client is None: raise RetryUploadTimeException( 'Connection to object storage is not configured.') bucket_exists = minio_client.bucket_exists(bucket_name=MINIO_BUCKET) if not bucket_exists: raise RetryUploadTimeException( f'Object storage bucket {MINIO_BUCKET} does not exist.') LOG.info( format_message(self.prefix, 'Sending %s metrics to object storage.' % (self.report_slice_id), account_number=self.account_number, report_platform_id=self.report_platform_id)) metric_file = tempfile.NamedTemporaryFile(mode='w', suffix='.json', delete=False) cluster_id = self.report_or_slice.source metric_file_name = f'{self.account_number}/{cluster_id}-{self.report_slice_id}.json' with metric_file: if self.report_json is None: self.report_json = json.loads(self.report_or_slice.report_json) metadata = {} metadata_str = self.report_or_slice.source_metadata if metadata_str: metadata = json.loads(metadata_str) metadata.update({ 'account': self.account_number, 'platform_id': str(self.report_platform_id), 'source': str(cluster_id) }) self.report_json['metadata'] = metadata json.dump(self.report_json, metric_file) try: minio_client.fput_object(bucket_name=MINIO_BUCKET, object_name=metric_file_name, file_path=metric_file.name) except (ResponseError, Exception) as err: # pylint: disable=broad-except os.remove(metric_file.name) raise err os.remove(metric_file.name)
def remove_outdated_archives(self): """Query for archived reports and delete them if they have come of age.""" try: current_time = datetime.now(pytz.utc) created_time_limit = current_time - timedelta(seconds=ARCHIVE_RECORD_RETENTION_PERIOD) # we only have to delete the archived reports because deleting an archived report # deletes all of the associated archived report slices outdated_report_archives = ReportArchive.objects.filter( processing_end_time__lte=created_time_limit) if outdated_report_archives: report_total = 0 report_slice_total = 0 for outdated_report in outdated_report_archives.iterator( chunk_size=CHUNK_SIZE_FOR_REPORTS): _, deleted_info = outdated_report.delete() report_total += deleted_info.get('api.ReportArchive') report_slice_total += deleted_info.get('api.ReportSliceArchive') LOG.info(format_message( self.prefix, 'Deleted %s archived report(s) & ' '%s archived report slice(s) older than %s seconds.' % (report_total, report_slice_total, int(ARCHIVE_RECORD_RETENTION_PERIOD)))) else: LOG.info( format_message( self.prefix, 'No archived reports to delete.' ) ) except Exception as error: # pylint: disable=broad-except DB_ERRORS.inc() LOG.error( format_message( self.prefix, 'Could not remove outdated archives ' 'due to the following error %s.' % str(error)))
def _remove_empty_mac_addresses(self, host: dict): """Remove 'mac_addresses' field.""" mac_addresses = host.get('mac_addresses') if mac_addresses is None or mac_addresses: return host del host['mac_addresses'] LOG.info( format_message( self.prefix, "Removed empty mac_addresses fact for host with FQDN '%s'" % (host.get('fqdn', '')), account_number=self.account_number, report_platform_id=self.report_platform_id)) return host
def deduplicate_reports(self): """If a report with the same id already exists, archive the new report.""" try: existing_reports = Report.objects.filter( report_platform_id=self.report_platform_id) if existing_reports.count() > 1: LOG.error( format_message( self.prefix, 'a report with the report_platform_id %s already exists.' % self.report_or_slice.report_platform_id, account_number=self.account_number, report_platform_id=self.report_platform_id)) self.archive_report_and_slices() except Report.DoesNotExist: pass
def _download_report(self): """ Download report. :returns content: The tar binary content or None if there are errors. """ self.prefix = 'REPORT DOWNLOAD' try: report_url = self.upload_message.get('url', None) if not report_url: raise FailDownloadException( format_message( self.prefix, 'kafka message missing report url. Message: %s' % self.upload_message, account_number=self.account_number)) LOG.info( format_message(self.prefix, 'downloading %s' % report_url, account_number=self.account_number)) download_response = requests.get(report_url) if download_response.status_code != HTTPStatus.OK: raise RetryDownloadException( format_message( self.prefix, 'HTTP status code %s returned for URL %s. Message: %s' % (download_response.status_code, report_url, self.upload_message), account_number=self.account_number)) LOG.info( format_message(self.prefix, 'successfully downloaded TAR %s' % report_url, account_number=self.account_number)) return download_response.content except FailDownloadException as fail_err: raise fail_err except requests.exceptions.HTTPError as err: raise RetryDownloadException( format_message(self.prefix, 'Unexpected http error for URL %s. Error: %s' % (report_url, err), account_number=self.account_number)) except Exception as err: raise RetryDownloadException( format_message(self.prefix, 'Unexpected error for URL %s. Error: %s' % (report_url, err), account_number=self.account_number))
def _transform_os_release(self, host: dict, transformed_obj=copy.deepcopy(TRANSFORMED_DICT)): """Transform 'system_profile.os_release' label.""" system_profile = host.get('system_profile', {}) os_release = system_profile.get('os_release') if not isinstance(os_release, str): return [host, transformed_obj] os_details = self._match_regex_and_find_os_details(os_release) LOG.info( format_message(self.prefix, "os version after parsing os_release: '%s'" % os_details, account_number=self.account_number, report_platform_id=self.report_platform_id)) if not os_details or not os_details['major']: del host['system_profile']['os_release'] transformed_obj['removed'].append('empty os_release') return [host, transformed_obj] host['system_profile']['os_release'] = os_details['version'] os_enum = next((value for key, value in OS_VS_ENUM.items() if key.lower() in os_details['name'].lower()), None) if os_enum: host['system_profile']['operating_system'] = { 'major': os_details['major'], 'minor': os_details['minor'], 'name': os_enum } else: transformed_obj['missing_data'].append( "operating system info for os release '%s'" % os_release) if os_release == os_details['version']: return [host, transformed_obj] transformed_obj['modified'].append("os_release from '%s' to '%s'" % (os_release, os_details['version'])) return [host, transformed_obj]
def _transform_os_kernel_version(self, host: dict): """Transform 'system_profile.os_kernel_version' label.""" system_profile = host.get('system_profile', {}) os_kernel_version = system_profile.get('os_kernel_version') if not isinstance(os_kernel_version, str): return host version_value = os_kernel_version.split('-')[0] host['system_profile']['os_kernel_version'] = version_value LOG.info( format_message( self.prefix, "os_kernel_version transformed '%s' -> '%s' for host with FQDN '%s'" % (os_kernel_version, version_value, host.get('fqdn', '')), account_number=self.account_number, report_platform_id=self.report_platform_id)) return host
def _print_transformed_info(self, request_id, host_id, transformed_obj): """Print transformed logs.""" if transformed_obj is None: return log_sections = [] for key, value in transformed_obj.items(): if value: log_sections.append('%s: %s' % (key, (',').join(value))) if log_sections: log_message = ( 'Transformed details host with id %s (request_id: %s):\n' % (host_id, request_id)) log_message += '\n'.join(log_sections) LOG.info( format_message(self.prefix, log_message, account_number=self.account_number, report_platform_id=self.report_platform_id))
def get_oldest_object_to_retry(self): """Grab the oldest report or report slice object to retry. returns: object to retry or None. """ status_info = Status() current_time = datetime.now(pytz.utc) objects_count = self.calculate_queued_objects(current_time, status_info) if self.object_class == Report: QUEUED_REPORTS.set(objects_count) else: QUEUED_REPORT_SLICES.set(objects_count) LOG.info( format_message( self.prefix, 'Number of %s waiting to be processed: %s' % (self.object_prefix.lower() + 's', objects_count))) # first we have to query for all objects with commit retries commit_retry_query = self.object_class.objects.filter( retry_type=self.object_class.GIT_COMMIT) # then we grab the oldest object from the query oldest_commit_object = self.return_queryset_object( queryset=commit_retry_query) if oldest_commit_object: same_commit = oldest_commit_object.git_commit == status_info.git_commit if not same_commit: return oldest_commit_object # If the above doesn't return, we should query for all time retries time_retry_query = self.object_class.objects.filter( retry_type=self.object_class.TIME) oldest_time_object = self.return_queryset_object( queryset=time_retry_query) if oldest_time_object: minutes_passed = int( (current_time - oldest_time_object.last_update_time).total_seconds() / 60) if minutes_passed >= RETRY_TIME: return oldest_time_object # if we haven't returned a retry object, return None return None