def validate_job(self, job_id): """ Gets file for job, validates each row, and sends valid rows to a staging table Args: job_id: Database ID for the validation Job Returns: Http response object """ # Create connection to job tracker database sess = GlobalDB.db().session # Get the job job = sess.query(Job).filter_by(job_id=job_id).one_or_none() if job is None: raise ResponseException( 'Job ID {} not found in database'.format(job_id), StatusCode.CLIENT_ERROR, None, ValidationError.jobError) # Make sure job's prerequisites are complete if not run_job_checks(job_id): validation_error_type = ValidationError.jobError write_file_error(job_id, None, validation_error_type) raise ResponseException( 'Prerequisites for Job ID {} are not complete'.format(job_id), StatusCode.CLIENT_ERROR, None, validation_error_type) # Make sure this is a validation job if job.job_type.name in ('csv_record_validation', 'validation'): job_type_name = job.job_type.name else: validation_error_type = ValidationError.jobError write_file_error(job_id, None, validation_error_type) raise ResponseException( 'Job ID {} is not a validation job (job type is {})'.format( job_id, job.job_type.name), StatusCode.CLIENT_ERROR, None, validation_error_type) # set job status to running and do validations mark_job_status(job_id, "running") if job_type_name == 'csv_record_validation': self.run_validation(job) elif job_type_name == 'validation': self.run_cross_validation(job) else: raise ResponseException("Bad job type for validator", StatusCode.INTERNAL_ERROR) # Update last validated date job.last_validated = datetime.utcnow() sess.commit() return JsonResponse.create(StatusCode.OK, {"message": "Validation complete"})
def validate_job(self, job_id): """ Gets file for job, validates each row, and sends valid rows to a staging table Args: request -- HTTP request containing the jobId Returns: Http response object """ # Create connection to job tracker database sess = GlobalDB.db().session # Get the job job = sess.query(Job).filter_by(job_id=job_id).one_or_none() if job is None: validation_error_type = ValidationError.jobError write_file_error(job_id, None, validation_error_type) raise ResponseException('Job ID {} not found in database'.format(job_id), StatusCode.CLIENT_ERROR, None, validation_error_type) # Make sure job's prerequisites are complete if not run_job_checks(job_id): validation_error_type = ValidationError.jobError write_file_error(job_id, None, validation_error_type) raise ResponseException('Prerequisites for Job ID {} are not complete'.format(job_id), StatusCode.CLIENT_ERROR, None, validation_error_type) # Make sure this is a validation job if job.job_type.name in ('csv_record_validation', 'validation'): job_type_name = job.job_type.name else: validation_error_type = ValidationError.jobError write_file_error(job_id, None, validation_error_type) raise ResponseException( 'Job ID {} is not a validation job (job type is {})'.format(job_id, job.job_type.name), StatusCode.CLIENT_ERROR, None, validation_error_type) # set job status to running and do validations mark_job_status(job_id, "running") if job_type_name == 'csv_record_validation': self.run_validation(job) elif job_type_name == 'validation': self.run_cross_validation(job) else: raise ResponseException("Bad job type for validator", StatusCode.INTERNAL_ERROR) # Update last validated date job.last_validated = datetime.utcnow() sess.commit() return JsonResponse.create(StatusCode.OK, {"message": "Validation complete"})
def validator_process_job(job_id, agency_code, is_retry=False): """ Retrieves a Job based on its ID, and kicks off a validation. Handles errors by ensuring the Job (if exists) is no longer running. Args: job_id: ID of a Job agency_code: CGAC or FREC code for agency, only required for file generations by Job is_retry: If this is not the very first time handling execution of this job. If True, cleanup is performed before proceeding to retry the job Raises: Any Exceptions raised by the GenerationManager or ValidationManager, excluding those explicitly handled """ if is_retry: if cleanup_validation(job_id): log_job_message( logger=logger, message= "Attempting a retry of {} after successful retry-cleanup.". format(inspect.stack()[0][3]), job_id=job_id, is_debug=True) else: log_job_message( logger=logger, message="Retry of {} found to be not necessary after cleanup. " "Returning from job with success.".format( inspect.stack()[0][3]), job_id=job_id, is_debug=True) return sess = GlobalDB.db().session job = None try: # Get the job job = sess.query(Job).filter_by(job_id=job_id).one_or_none() if job is None: validation_error_type = ValidationError.jobError write_file_error(job_id, None, validation_error_type) raise ResponseException( 'Job ID {} not found in database'.format(job_id), StatusCode.CLIENT_ERROR, None, validation_error_type) mark_job_status(job_id, 'ready') # We can either validate or generate a file based on Job ID if job.job_type.name == 'file_upload': # Generate A, E, or F file file_generation_manager = FileGenerationManager(sess, g.is_local, job=job) file_generation_manager.generate_file(agency_code) else: # Run validations validation_manager = ValidationManager( g.is_local, CONFIG_SERVICES['error_report_path']) validation_manager.validate_job(job.job_id) except (ResponseException, csv.Error, UnicodeDecodeError, ValueError) as e: # Handle exceptions explicitly raised during validation error_data = { 'message': 'An exception occurred in the Validator', 'message_type': 'ValidatorInfo', 'job_id': job_id, 'traceback': traceback.format_exc() } if job: error_data.update({ 'submission_id': job.submission_id, 'file_type': job.file_type.name }) logger.error(error_data) sess.refresh(job) job.error_message = str(e) if job.filename is not None: error_type = ValidationError.unknownError if isinstance(e, UnicodeDecodeError): error_type = ValidationError.encodingError elif isinstance(e, ResponseException): error_type = e.errorType write_file_error(job.job_id, job.filename, error_type) mark_job_status(job.job_id, 'invalid') else: logger.error(error_data) raise e except Exception as e: # Log uncaught exceptions and fail the Job error_data = { 'message': 'An unhandled exception occurred in the Validator', 'message_type': 'ValidatorInfo', 'job_id': job_id, 'traceback': traceback.format_exc() } if job: error_data.update({ 'submission_id': job.submission_id, 'file_type': job.file_type.name }) logger.error(error_data) # Try to mark the Job as failed, but continue raising the original Exception if not possible try: mark_job_status(job_id, 'failed') sess.refresh(job) job.error_message = str(e) sess.commit() except: pass raise e
def run_app(): """Run the application.""" app = create_app() # This is for DataDog (Do Not Delete) if USE_DATADOG: TraceMiddleware(app, tracer, service="broker-dd", distributed_tracing=False) with app.app_context(): current_app.debug = CONFIG_SERVICES['debug'] local = CONFIG_BROKER['local'] g.is_local = local error_report_path = CONFIG_SERVICES['error_report_path'] current_app.config.from_object(__name__) # Create connection to job tracker database sess = GlobalDB.db().session # Future: Override config w/ environment variable, if set current_app.config.from_envvar('VALIDATOR_SETTINGS', silent=True) queue = sqs_queue() messages = [] logger.info("Starting SQS polling") while True: # Set current_message to None before every loop to ensure it's never set to the previous message current_message = None try: # Grabs one (or more) messages from the queue messages = queue.receive_messages( WaitTimeSeconds=10, MessageAttributeNames=['All']) for message in messages: logger.info("Message received: %s", message.body) # Retrieve the job_id from the message body current_message = message g.job_id = message.body mark_job_status(g.job_id, "ready") # Get the job job = sess.query(Job).filter_by( job_id=g.job_id).one_or_none() if job is None: validation_error_type = ValidationError.jobError write_file_error(g.job_id, None, validation_error_type) raise ResponseException( 'Job ID {} not found in database'.format(g.job_id), StatusCode.CLIENT_ERROR, None, validation_error_type) # We have two major functionalities in the Validator: validation and file generation if (not job.file_type or job.file_type.letter_name in ['A', 'B', 'C', 'FABS'] or job.job_type.name != 'file_upload') and job.submission_id: # Run validations validation_manager = ValidationManager( local, error_report_path) validation_manager.validate_job(job.job_id) else: # Retrieve the agency code data from the message attributes msg_attr = current_message.message_attributes agency_code = msg_attr['agency_code']['StringValue'] if msg_attr and \ msg_attr.get('agency_code') else None agency_type = msg_attr['agency_type']['StringValue'] if msg_attr and \ msg_attr.get('agency_type') else None file_generation_manager = FileGenerationManager( job, agency_code, agency_type, local) file_generation_manager.generate_from_job() sess.commit() sess.refresh(job) # Delete from SQS once processed message.delete() except ResponseException as e: # Handle exceptions explicitly raised during validation. logger.error(traceback.format_exc()) job = get_current_job() if job: if job.filename is not None: # Insert file-level error info to the database write_file_error(job.job_id, job.filename, e.errorType, e.extraInfo) if e.errorType != ValidationError.jobError: # Job passed prerequisites for validation but an error happened somewhere: mark job as 'invalid' mark_job_status(job.job_id, 'invalid') if current_message: if e.errorType in [ ValidationError.rowCountError, ValidationError.headerError, ValidationError.fileTypeError ]: current_message.delete() except Exception as e: # Handle uncaught exceptions in validation process. logger.error(traceback.format_exc()) # csv-specific errors get a different job status and response code if isinstance(e, ValueError) or isinstance( e, csv.Error) or isinstance(e, UnicodeDecodeError): job_status = 'invalid' else: job_status = 'failed' job = get_current_job() if job: if job.filename is not None: error_type = ValidationError.unknownError if isinstance(e, UnicodeDecodeError): error_type = ValidationError.encodingError # TODO Is this really the only case where the message should be deleted? if current_message: current_message.delete() write_file_error(job.job_id, job.filename, error_type) mark_job_status(job.job_id, job_status) finally: GlobalDB.close() # Set visibility to 0 so that another attempt can be made to process in SQS immediately, # instead of waiting for the timeout window to expire for message in messages: try: message.change_visibility(VisibilityTimeout=0) except ClientError: # Deleted messages will throw errors, which is fine because they are handled pass
def run_app(): """Run the application.""" app = Flask(__name__) with app.app_context(): current_app.debug = CONFIG_SERVICES['debug'] local = CONFIG_BROKER['local'] g.is_local = local error_report_path = CONFIG_SERVICES['error_report_path'] current_app.config.from_object(__name__) # Future: Override config w/ environment variable, if set current_app.config.from_envvar('VALIDATOR_SETTINGS', silent=True) queue = sqs_queue() logger.info("Starting SQS polling") current_message = None while True: try: # Grabs one (or more) messages from the queue messages = queue.receive_messages(WaitTimeSeconds=10) for message in messages: logger.info("Message received: %s", message.body) current_message = message GlobalDB.db() g.job_id = message.body mark_job_status(g.job_id, "ready") validation_manager = ValidationManager(local, error_report_path) validation_manager.validate_job(g.job_id) # delete from SQS once processed message.delete() except ResponseException as e: # Handle exceptions explicitly raised during validation. logger.error(str(e)) job = get_current_job() if job: if job.filename is not None: # insert file-level error info to the database write_file_error(job.job_id, job.filename, e.errorType, e.extraInfo) if e.errorType != ValidationError.jobError: # job pass prerequisites for validation, but an error # happened somewhere. mark job as 'invalid' mark_job_status(job.job_id, 'invalid') if current_message: if e.errorType in [ValidationError.rowCountError, ValidationError.headerError, ValidationError.fileTypeError]: current_message.delete() except Exception as e: # Handle uncaught exceptions in validation process. logger.error(str(e)) # csv-specific errors get a different job status and response code if isinstance(e, ValueError) or isinstance(e, csv.Error) or isinstance(e, UnicodeDecodeError): job_status = 'invalid' else: job_status = 'failed' job = get_current_job() if job: if job.filename is not None: error_type = ValidationError.unknownError if isinstance(e, UnicodeDecodeError): error_type = ValidationError.encodingError # TODO Is this really the only case where the message should be deleted? if current_message: current_message.delete() write_file_error(job.job_id, job.filename, error_type) mark_job_status(job.job_id, job_status) finally: GlobalDB.close() # Set visibility to 0 so that another attempt can be made to process in SQS immediately, # instead of waiting for the timeout window to expire for message in messages: message.change_visibility(VisibilityTimeout=0)