def validate_threaded(): """Start the validation process on a new thread.""" @copy_current_request_context def ThreadedFunction(arg): """The new thread.""" threadedManager = ValidationManager(local, error_report_path) threadedManager.threadedValidateJob(arg) try: interfaces = InterfaceHolder() jobTracker = interfaces.jobDb except ResponseException as e: open("errorLog","a").write(str(e) + "\n") return JsonResponse.error(e,e.status,table = "cannot connect to job database") except Exception as e: open("errorLog","a").write(str(e) + "\n") exc = ResponseException(str(e),StatusCode.INTERNAL_ERROR,type(e)) return JsonResponse.error(exc,exc.status,table= "cannot connect to job database") jobId = None manager = ValidationManager(local, error_report_path) try: jobId = manager.getJobID(request) except ResponseException as e: manager.markJob(jobId,jobTracker,"invalid",interfaces.errorDb,manager.filename) CloudLogger.logError(str(e),e,traceback.extract_tb(sys.exc_info()[2])) return JsonResponse.error(e,e.status,table ="") except Exception as e: exc = ResponseException(str(e),StatusCode.CLIENT_ERROR,type(e)) manager.markJob(jobId,jobTracker,"invalid",interfaces.errorDb,manager.filename) CloudLogger.logError(str(e),exc,traceback.extract_tb(sys.exc_info()[2])) return JsonResponse.error(exc,exc.status,table="") try: manager.testJobID(jobId,interfaces) except ResponseException as e: open("errorLog","a").write(str(e) + "\n") # Job is not ready to run according to job tracker, do not change status of job in job tracker interfaces.errorDb.writeFileError(jobId,manager.filename,ValidationError.jobError) return JsonResponse.error(e,e.status,table="") except Exception as e: open("errorLog","a").write(str(e) + "\n") exc = ResponseException(str(e),StatusCode.CLIENT_ERROR,type(e)) interfaces.errorDb.writeFileError(jobId,manager.filename,ValidationError.jobError) return JsonResponse.error(exc,exc.status,table="") thread = Thread(target=ThreadedFunction, args= (jobId,)) try : jobTracker.markJobStatus(jobId,"running") except Exception as e: open("errorLog","a").write(str(e) + "\n") exc = ResponseException(str(e),StatusCode.INTERNAL_ERROR,type(e)) return JsonResponse.error(exc,exc.status,table="could not start job") interfaces.close() thread.start() return JsonResponse.create(StatusCode.OK,{"table":"job"+str(jobId)})
def validator_process_job(job_id, agency_code, is_retry=False): """ Retrieves a Job based on its ID, and kicks off a validation. Handles errors by ensuring the Job (if exists) is no longer running. Args: job_id: ID of a Job agency_code: CGAC or FREC code for agency, only required for file generations by Job is_retry: If this is not the very first time handling execution of this job. If True, cleanup is performed before proceeding to retry the job Raises: Any Exceptions raised by the GenerationManager or ValidationManager, excluding those explicitly handled """ if is_retry: if cleanup_validation(job_id): log_job_message( logger=logger, message= "Attempting a retry of {} after successful retry-cleanup.". format(inspect.stack()[0][3]), job_id=job_id, is_debug=True) else: log_job_message( logger=logger, message="Retry of {} found to be not necessary after cleanup. " "Returning from job with success.".format( inspect.stack()[0][3]), job_id=job_id, is_debug=True) return sess = GlobalDB.db().session job = None try: # Get the job job = sess.query(Job).filter_by(job_id=job_id).one_or_none() if job is None: validation_error_type = ValidationError.jobError write_file_error(job_id, None, validation_error_type) raise ResponseException( 'Job ID {} not found in database'.format(job_id), StatusCode.CLIENT_ERROR, None, validation_error_type) mark_job_status(job_id, 'ready') # We can either validate or generate a file based on Job ID if job.job_type.name == 'file_upload': # Generate A, E, or F file file_generation_manager = FileGenerationManager(sess, g.is_local, job=job) file_generation_manager.generate_file(agency_code) else: # Run validations validation_manager = ValidationManager( g.is_local, CONFIG_SERVICES['error_report_path']) validation_manager.validate_job(job.job_id) except (ResponseException, csv.Error, UnicodeDecodeError, ValueError) as e: # Handle exceptions explicitly raised during validation error_data = { 'message': 'An exception occurred in the Validator', 'message_type': 'ValidatorInfo', 'job_id': job_id, 'traceback': traceback.format_exc() } if job: error_data.update({ 'submission_id': job.submission_id, 'file_type': job.file_type.name }) logger.error(error_data) sess.refresh(job) job.error_message = str(e) if job.filename is not None: error_type = ValidationError.unknownError if isinstance(e, UnicodeDecodeError): error_type = ValidationError.encodingError elif isinstance(e, ResponseException): error_type = e.errorType write_file_error(job.job_id, job.filename, error_type) mark_job_status(job.job_id, 'invalid') else: logger.error(error_data) raise e except Exception as e: # Log uncaught exceptions and fail the Job error_data = { 'message': 'An unhandled exception occurred in the Validator', 'message_type': 'ValidatorInfo', 'job_id': job_id, 'traceback': traceback.format_exc() } if job: error_data.update({ 'submission_id': job.submission_id, 'file_type': job.file_type.name }) logger.error(error_data) # Try to mark the Job as failed, but continue raising the original Exception if not possible try: mark_job_status(job_id, 'failed') sess.refresh(job) job.error_message = str(e) sess.commit() except: pass raise e
def ThreadedFunction(arg): """The new thread.""" threadedManager = ValidationManager(local, error_report_path) threadedManager.threadedValidateJob(arg)
def createApp(): """Create the Flask app.""" try: app = Flask(__name__) local = CONFIG_BROKER['local'] error_report_path = CONFIG_SERVICES['error_report_path'] app.config.from_object(__name__) # Future: Override config w/ environment variable, if set app.config.from_envvar('VALIDATOR_SETTINGS', silent=True) validationManager = ValidationManager(local, error_report_path) @app.route("/", methods=["GET"]) def testApp(): """Confirm server running.""" return "Validator is running" @app.route("/validate_threaded/", methods=["POST"]) def validate_threaded(): """Start the validation process on a new thread.""" @copy_current_request_context def ThreadedFunction(arg): """The new thread.""" threadedManager = ValidationManager(local, error_report_path) threadedManager.threadedValidateJob(arg) try: interfaces = InterfaceHolder() jobTracker = interfaces.jobDb except ResponseException as e: open("errorLog", "a").write(str(e) + "\n") return JsonResponse.error( e, e.status, table="cannot connect to job database") except Exception as e: open("errorLog", "a").write(str(e) + "\n") exc = ResponseException(str(e), StatusCode.INTERNAL_ERROR, type(e)) return JsonResponse.error( exc, exc.status, table="cannot connect to job database") jobId = None manager = ValidationManager(local, error_report_path) try: jobId = manager.getJobID(request) except ResponseException as e: manager.markJob(jobId, jobTracker, "invalid", interfaces.errorDb, manager.filename) CloudLogger.logError(str(e), e, traceback.extract_tb(sys.exc_info()[2])) return JsonResponse.error(e, e.status, table="") except Exception as e: exc = ResponseException(str(e), StatusCode.CLIENT_ERROR, type(e)) manager.markJob(jobId, jobTracker, "invalid", interfaces.errorDb, manager.filename) CloudLogger.logError(str(e), exc, traceback.extract_tb(sys.exc_info()[2])) return JsonResponse.error(exc, exc.status, table="") try: manager.testJobID(jobId, interfaces) except ResponseException as e: open("errorLog", "a").write(str(e) + "\n") # Job is not ready to run according to job tracker, do not change status of job in job tracker interfaces.errorDb.writeFileError(jobId, manager.filename, ValidationError.jobError) return JsonResponse.error(e, e.status, table="") except Exception as e: open("errorLog", "a").write(str(e) + "\n") exc = ResponseException(str(e), StatusCode.CLIENT_ERROR, type(e)) interfaces.errorDb.writeFileError(jobId, manager.filename, ValidationError.jobError) return JsonResponse.error(exc, exc.status, table="") thread = Thread(target=ThreadedFunction, args=(jobId, )) try: jobTracker.markStatus(jobId, "running") except Exception as e: open("errorLog", "a").write(str(e) + "\n") exc = ResponseException(str(e), StatusCode.INTERNAL_ERROR, type(e)) return JsonResponse.error(exc, exc.status, table="could not start job") interfaces.close() thread.start() return JsonResponse.create(StatusCode.OK, {"table": "job" + str(jobId)}) @app.route("/validate/", methods=["POST"]) def validate(): """Start the validation process on the same threads.""" interfaces = InterfaceHolder() # Create sessions for this route try: return validationManager.validateJob(request, interfaces) except Exception as e: # Something went wrong getting the flask request open("errorLog", "a").write(str(e) + "\n") exc = ResponseException(str(e), StatusCode.INTERNAL_ERROR, type(e)) return JsonResponse.error(exc, exc.status, table="") finally: interfaces.close() JsonResponse.debugMode = CONFIG_SERVICES['rest_trace'] return app except Exception as e: trace = traceback.extract_tb(sys.exc_info()[2], 10) CloudLogger.logError('Validator App Level Error: ', e, trace) raise
def run_app(): """Run the application.""" app = create_app() # This is for DataDog (Do Not Delete) if USE_DATADOG: TraceMiddleware(app, tracer, service="broker-dd", distributed_tracing=False) with app.app_context(): current_app.debug = CONFIG_SERVICES['debug'] local = CONFIG_BROKER['local'] g.is_local = local error_report_path = CONFIG_SERVICES['error_report_path'] current_app.config.from_object(__name__) # Create connection to job tracker database sess = GlobalDB.db().session # Future: Override config w/ environment variable, if set current_app.config.from_envvar('VALIDATOR_SETTINGS', silent=True) queue = sqs_queue() messages = [] logger.info("Starting SQS polling") while True: # Set current_message to None before every loop to ensure it's never set to the previous message current_message = None try: # Grabs one (or more) messages from the queue messages = queue.receive_messages( WaitTimeSeconds=10, MessageAttributeNames=['All']) for message in messages: logger.info("Message received: %s", message.body) # Retrieve the job_id from the message body current_message = message g.job_id = message.body mark_job_status(g.job_id, "ready") # Get the job job = sess.query(Job).filter_by( job_id=g.job_id).one_or_none() if job is None: validation_error_type = ValidationError.jobError write_file_error(g.job_id, None, validation_error_type) raise ResponseException( 'Job ID {} not found in database'.format(g.job_id), StatusCode.CLIENT_ERROR, None, validation_error_type) # We have two major functionalities in the Validator: validation and file generation if (not job.file_type or job.file_type.letter_name in ['A', 'B', 'C', 'FABS'] or job.job_type.name != 'file_upload') and job.submission_id: # Run validations validation_manager = ValidationManager( local, error_report_path) validation_manager.validate_job(job.job_id) else: # Retrieve the agency code data from the message attributes msg_attr = current_message.message_attributes agency_code = msg_attr['agency_code']['StringValue'] if msg_attr and \ msg_attr.get('agency_code') else None agency_type = msg_attr['agency_type']['StringValue'] if msg_attr and \ msg_attr.get('agency_type') else None file_generation_manager = FileGenerationManager( job, agency_code, agency_type, local) file_generation_manager.generate_from_job() sess.commit() sess.refresh(job) # Delete from SQS once processed message.delete() except ResponseException as e: # Handle exceptions explicitly raised during validation. logger.error(traceback.format_exc()) job = get_current_job() if job: if job.filename is not None: # Insert file-level error info to the database write_file_error(job.job_id, job.filename, e.errorType, e.extraInfo) if e.errorType != ValidationError.jobError: # Job passed prerequisites for validation but an error happened somewhere: mark job as 'invalid' mark_job_status(job.job_id, 'invalid') if current_message: if e.errorType in [ ValidationError.rowCountError, ValidationError.headerError, ValidationError.fileTypeError ]: current_message.delete() except Exception as e: # Handle uncaught exceptions in validation process. logger.error(traceback.format_exc()) # csv-specific errors get a different job status and response code if isinstance(e, ValueError) or isinstance( e, csv.Error) or isinstance(e, UnicodeDecodeError): job_status = 'invalid' else: job_status = 'failed' job = get_current_job() if job: if job.filename is not None: error_type = ValidationError.unknownError if isinstance(e, UnicodeDecodeError): error_type = ValidationError.encodingError # TODO Is this really the only case where the message should be deleted? if current_message: current_message.delete() write_file_error(job.job_id, job.filename, error_type) mark_job_status(job.job_id, job_status) finally: GlobalDB.close() # Set visibility to 0 so that another attempt can be made to process in SQS immediately, # instead of waiting for the timeout window to expire for message in messages: try: message.change_visibility(VisibilityTimeout=0) except ClientError: # Deleted messages will throw errors, which is fine because they are handled pass
def setUpClass(cls): """ Set up class-wide resources (test data) """ super(ErrorWarningTests, cls).setUpClass() logging.getLogger('dataactcore').setLevel(logging.ERROR) logging.getLogger('dataactvalidator').setLevel(logging.ERROR) with create_app().app_context(): # get the submission test users sess = GlobalDB.db().session cls.session = sess # set up default e-mails for tests admin_user = sess.query(User).filter( User.email == cls.test_users['admin_user']).one() cls.validator = ValidationManager( directory=CONFIG_SERVICES['error_report_path']) # Just have one valid submission and then keep on reloading files cls.submission_id = insert_submission(sess, admin_user.user_id, cgac_code='SYS', start_date='01/2001', end_date='03/2001', is_quarter=True) cls.submission = sess.query(Submission).filter_by( submission_id=cls.submission_id).one() cls.val_job = insert_job( cls.session, FILE_TYPE_DICT['appropriations'], JOB_STATUS_DICT['ready'], JOB_TYPE_DICT['csv_record_validation'], cls.submission_id, filename=JOB_TYPE_DICT['csv_record_validation']) cls.original_reports = set( os.listdir(CONFIG_SERVICES['error_report_path'])) # adding TAS to ensure valid file is valid tas1 = TASFactory(account_num=1, allocation_transfer_agency='019', agency_identifier='072', beginning_period_of_availa=None, ending_period_of_availabil=None, availability_type_code='X', main_account_code='0306', sub_account_code='000', internal_start_date='01-01-2000') tas2 = TASFactory(account_num=2, allocation_transfer_agency=None, agency_identifier='019', beginning_period_of_availa='2016', ending_period_of_availabil='2016', availability_type_code=None, main_account_code='0113', sub_account_code='000', internal_start_date='01-01-2000') tas3 = TASFactory(account_num=3, allocation_transfer_agency=None, agency_identifier='028', beginning_period_of_availa=None, ending_period_of_availabil=None, availability_type_code='X', main_account_code='0406', sub_account_code='000', internal_start_date='01-01-2000') tas4 = TASFactory(account_num=4, allocation_transfer_agency=None, agency_identifier='028', beginning_period_of_availa='2010', ending_period_of_availabil='2011', availability_type_code=None, main_account_code='0406', sub_account_code='000', internal_start_date='01-01-2000') tas5 = TASFactory(account_num=5, allocation_transfer_agency='069', agency_identifier='013', beginning_period_of_availa=None, ending_period_of_availabil=None, availability_type_code='X', main_account_code='2050', sub_account_code='005', internal_start_date='01-01-2000') tas6 = TASFactory(account_num=6, allocation_transfer_agency='028', agency_identifier='028', beginning_period_of_availa=None, ending_period_of_availabil=None, availability_type_code='X', main_account_code='8007', sub_account_code='000', internal_start_date='01-01-2000') tas7 = TASFactory(account_num=7, allocation_transfer_agency=None, agency_identifier='049', beginning_period_of_availa=None, ending_period_of_availabil=None, availability_type_code='X', main_account_code='0100', sub_account_code='000', internal_start_date='01-01-2000') tas8 = TASFactory(account_num=8, allocation_transfer_agency=None, agency_identifier='049', beginning_period_of_availa='2010', ending_period_of_availabil='2011', availability_type_code=None, main_account_code='0100', sub_account_code='000', internal_start_date='01-01-2000') tas9 = TASFactory(account_num=9, allocation_transfer_agency=None, agency_identifier='049', beginning_period_of_availa='2014', ending_period_of_availabil='2015', availability_type_code=None, main_account_code='0100', sub_account_code='000', internal_start_date='01-01-2000') tas10 = TASFactory(account_num=10, allocation_transfer_agency=None, agency_identifier='049', beginning_period_of_availa='2015', ending_period_of_availabil='2016', availability_type_code=None, main_account_code='0100', sub_account_code='000', internal_start_date='01-01-2000') sess.add_all( [tas1, tas2, tas3, tas4, tas5, tas6, tas7, tas8, tas9, tas10]) # adding GTAS to ensure valid file is valid gtas1 = SF133Factory(tas=concat_tas_dict(tas1.component_dict()), allocation_transfer_agency='019', agency_identifier='072', beginning_period_of_availa=None, line=1009, ending_period_of_availabil=None, availability_type_code='X', main_account_code='0306', sub_account_code='000', period=6, fiscal_year=2001) gtas2 = SF133Factory(tas=concat_tas_dict(tas2.component_dict()), allocation_transfer_agency=None, agency_identifier='019', beginning_period_of_availa='2016', line=1009, ending_period_of_availabil='2016', availability_type_code=None, main_account_code='0113', sub_account_code='000', period=6, fiscal_year=2001) gtas3 = SF133Factory(tas=concat_tas_dict(tas3.component_dict()), allocation_transfer_agency=None, agency_identifier='028', beginning_period_of_availa=None, line=1009, ending_period_of_availabil=None, availability_type_code='X', main_account_code='0406', sub_account_code='000', period=6, fiscal_year=2001) gtas4 = SF133Factory(tas=concat_tas_dict(tas4.component_dict()), allocation_transfer_agency=None, agency_identifier='028', beginning_period_of_availa='2010', line=1009, ending_period_of_availabil='2011', availability_type_code=None, main_account_code='0406', sub_account_code='000', period=6, fiscal_year=2001) gtas5 = SF133Factory(tas=concat_tas_dict(tas5.component_dict()), allocation_transfer_agency='069', agency_identifier='013', beginning_period_of_availa=None, line=1009, ending_period_of_availabil=None, availability_type_code='X', main_account_code='2050', sub_account_code='005', period=6, fiscal_year=2001) gtas6 = SF133Factory(tas=concat_tas_dict(tas6.component_dict()), allocation_transfer_agency='028', agency_identifier='028', beginning_period_of_availa=None, line=1009, ending_period_of_availabil=None, availability_type_code='X', main_account_code='8007', sub_account_code='000', period=6, fiscal_year=2001) gtas7 = SF133Factory(tas=concat_tas_dict(tas7.component_dict()), allocation_transfer_agency=None, agency_identifier='049', beginning_period_of_availa=None, line=1009, ending_period_of_availabil=None, availability_type_code='X', main_account_code='0100', sub_account_code='000', period=6, fiscal_year=2001) gtas8 = SF133Factory(tas=concat_tas_dict(tas8.component_dict()), allocation_transfer_agency=None, agency_identifier='049', beginning_period_of_availa='2010', line=1009, ending_period_of_availabil='2011', availability_type_code=None, main_account_code='0100', sub_account_code='000', period=6, fiscal_year=2001) gtas9 = SF133Factory(tas=concat_tas_dict(tas9.component_dict()), allocation_transfer_agency=None, agency_identifier='049', beginning_period_of_availa='2014', line=1009, ending_period_of_availabil='2015', availability_type_code=None, main_account_code='0100', sub_account_code='000', period=6, fiscal_year=2001) gtas10 = SF133Factory(tas=concat_tas_dict(tas10.component_dict()), allocation_transfer_agency=None, agency_identifier='049', beginning_period_of_availa='2015', line=1009, ending_period_of_availabil='2016', availability_type_code=None, main_account_code='0100', sub_account_code='000', period=6, fiscal_year=2001) sess.add_all([ gtas1, gtas2, gtas3, gtas4, gtas5, gtas6, gtas7, gtas8, gtas9, gtas10 ]) sess.commit()
def validate(): """Start the validation process.""" if request.is_json: g.job_id = request.json.get('job_id') validation_manager = ValidationManager(local, error_report_path) return validation_manager.validate_job(request)
def run_app(): """Run the application.""" app = Flask(__name__) with app.app_context(): current_app.debug = CONFIG_SERVICES['debug'] local = CONFIG_BROKER['local'] g.is_local = local error_report_path = CONFIG_SERVICES['error_report_path'] current_app.config.from_object(__name__) # Future: Override config w/ environment variable, if set current_app.config.from_envvar('VALIDATOR_SETTINGS', silent=True) queue = sqs_queue() logger.info("Starting SQS polling") current_message = None while True: try: # Grabs one (or more) messages from the queue messages = queue.receive_messages(WaitTimeSeconds=10) for message in messages: logger.info("Message received: %s", message.body) current_message = message GlobalDB.db() g.job_id = message.body mark_job_status(g.job_id, "ready") validation_manager = ValidationManager(local, error_report_path) validation_manager.validate_job(g.job_id) # delete from SQS once processed message.delete() except ResponseException as e: # Handle exceptions explicitly raised during validation. logger.error(str(e)) job = get_current_job() if job: if job.filename is not None: # insert file-level error info to the database write_file_error(job.job_id, job.filename, e.errorType, e.extraInfo) if e.errorType != ValidationError.jobError: # job pass prerequisites for validation, but an error # happened somewhere. mark job as 'invalid' mark_job_status(job.job_id, 'invalid') if current_message: if e.errorType in [ValidationError.rowCountError, ValidationError.headerError, ValidationError.fileTypeError]: current_message.delete() except Exception as e: # Handle uncaught exceptions in validation process. logger.error(str(e)) # csv-specific errors get a different job status and response code if isinstance(e, ValueError) or isinstance(e, csv.Error) or isinstance(e, UnicodeDecodeError): job_status = 'invalid' else: job_status = 'failed' job = get_current_job() if job: if job.filename is not None: error_type = ValidationError.unknownError if isinstance(e, UnicodeDecodeError): error_type = ValidationError.encodingError # TODO Is this really the only case where the message should be deleted? if current_message: current_message.delete() write_file_error(job.job_id, job.filename, error_type) mark_job_status(job.job_id, job_status) finally: GlobalDB.close() # Set visibility to 0 so that another attempt can be made to process in SQS immediately, # instead of waiting for the timeout window to expire for message in messages: message.change_visibility(VisibilityTimeout=0)