def test_generate_new_d1_file_success(monkeypatch, mock_broker_config_paths, database): """ Testing that a new D1 file is generated """ sess = database.session job = JobFactory( job_status_id=JOB_STATUS_DICT['waiting'], job_type_id=JOB_TYPE_DICT['file_upload'], file_type_id=FILE_TYPE_DICT['award_procurement'], filename=str( mock_broker_config_paths['d_file_storage_path'].join('original')), start_date='01/01/2017', end_date='01/31/2017', original_filename='original', from_cached=True) sess.add(job) sess.commit() file_gen_manager = FileGenerationManager(job, '123', 'awarding', CONFIG_BROKER['local']) file_gen_manager.generate_from_job() sess.refresh(job) file_request = sess.query(FileRequest).filter( FileRequest.job_id == job.job_id).one_or_none() assert file_request is not None assert file_request.is_cached_file is True assert file_request.start_date == job.start_date assert file_request.end_date == job.end_date assert file_request.agency_code == '123' assert file_request.request_date == datetime.now().date() assert job.original_filename != 'original' assert job.from_cached is False assert job.job_status_id == JOB_STATUS_DICT['finished']
def test_generate_new_d1_file_keep_old_job_files_success( monkeypatch, mock_broker_config_paths, database): """ Testing that when a new file is generated by a child job, the parent job's files stay the same """ sess = database.session original_job = JobFactory( job_status_id=JOB_STATUS_DICT['waiting'], job_type_id=JOB_TYPE_DICT['file_upload'], file_type_id=FILE_TYPE_DICT['award_procurement'], filename=str( mock_broker_config_paths['d_file_storage_path'].join('original')), start_date='01/01/2017', end_date='01/31/2017', original_filename='original', from_cached=False) new_job = JobFactory( job_status_id=JOB_STATUS_DICT['finished'], job_type_id=JOB_TYPE_DICT['file_upload'], file_type_id=FILE_TYPE_DICT['award_procurement'], filename=str( mock_broker_config_paths['d_file_storage_path'].join('original')), start_date='01/01/2017', end_date='01/31/2017', original_filename='original', from_cached=False) sess.add_all([original_job, new_job]) sess.commit() fr = FileRequestFactory(job=original_job, parent_job_id=None, is_cached_file=False, agency_code='123', agency_type='awarding', start_date=original_job.start_date, end_date=original_job.end_date, file_type='D1', request_date=datetime.now().date()) fr_2 = FileRequestFactory(job=new_job, parent_job_id=original_job.job_id, is_cached_file=False, agency_code='123', agency_type='awarding', start_date=new_job.start_date, end_date=new_job.end_date, file_type='D1', request_date=datetime.now().date()) sess.add_all([fr, fr_2]) sess.commit() file_gen_manager = FileGenerationManager(original_job, '123', 'awarding', CONFIG_BROKER['local']) file_gen_manager.generate_from_job() sess.refresh(original_job) sess.refresh(new_job) assert original_job.original_filename != 'original' assert new_job.original_filename == 'original'
def test_generate_new_d1_file_different_dates_success(monkeypatch, mock_broker_config_paths, database): """ Testing that a new D1 file is generated using the same data except for the dates """ sess = database.session job = JobFactory( job_status_id=JOB_STATUS_DICT['waiting'], job_type_id=JOB_TYPE_DICT['file_upload'], file_type_id=FILE_TYPE_DICT['award_procurement'], filename=str( mock_broker_config_paths['d_file_storage_path'].join('original')), start_date='01/01/2017', end_date='01/31/2017', original_filename='original', from_cached=True) sess.add(job) sess.commit() fr = FileRequestFactory(job=job, is_cached_file=True, agency_code='123', agency_type='awarding', start_date=job.start_date, end_date=job.end_date, file_type='D1', request_date=datetime.now().date()) sess.add(fr) sess.commit() # Change the job start date old_start_date = job.start_date job.start_date = '01/02/2017' sess.commit() file_gen_manager = FileGenerationManager(job, '123', 'awarding', CONFIG_BROKER['local']) file_gen_manager.generate_from_job() sess.refresh(job) new_file_request = sess.query(FileRequest).filter( FileRequest.job_id == job.job_id, FileRequest.is_cached_file.is_(True)).one_or_none() assert new_file_request is not None assert new_file_request.is_cached_file is True assert new_file_request.start_date == job.start_date assert new_file_request.end_date == job.end_date assert new_file_request.agency_code == '123' assert new_file_request.agency_type == 'awarding' old_file_request = sess.query(FileRequest).filter( FileRequest.job_id == job.job_id, FileRequest.is_cached_file.is_(False)).one_or_none() assert old_file_request is not None assert old_file_request.is_cached_file is False assert old_file_request.start_date == old_start_date assert old_file_request.end_date == job.end_date assert old_file_request.agency_code == '123' assert old_file_request.agency_type == 'awarding'
def test_uncache_new_d1_file_fpds_success(monkeypatch, mock_broker_config_paths, database): """Testing that a new file is not generated if another job has already has a successfully generated file""" sess = database.session original_job = JobFactory( job_status_id=JOB_STATUS_DICT['finished'], job_type_id=JOB_TYPE_DICT['file_upload'], file_type_id=FILE_TYPE_DICT['award_procurement'], filename=str( mock_broker_config_paths['d_file_storage_path'].join('original')), start_date='01/01/2017', end_date='01/31/2017', original_filename='original', from_cached=True) sess.add(original_job) sess.commit() file_request = FileRequestFactory(job=original_job, is_cached_file=True, agency_code='123', agency_type='awarding', start_date='01/01/2017', end_date='01/31/2017', file_type='D1', request_date=(datetime.now().date() - timedelta(1))) new_job = JobFactory(job_status_id=JOB_STATUS_DICT['waiting'], job_type_id=JOB_TYPE_DICT['file_upload'], file_type_id=FILE_TYPE_DICT['award_procurement'], start_date='01/01/2017', end_date='01/31/2017') sess.add_all([file_request, new_job]) sess.commit() file_gen_manager = FileGenerationManager(new_job, '123', 'awarding', CONFIG_BROKER['local']) file_gen_manager.generate_from_job() sess.refresh(new_job) file_request = sess.query(FileRequest).filter( FileRequest.job_id == new_job.job_id).one_or_none() assert file_request is not None assert file_request.is_cached_file is True assert file_request.start_date == new_job.start_date assert file_request.end_date == new_job.end_date assert file_request.agency_code == '123' assert file_request.request_date == datetime.now().date() assert new_job.original_filename != 'original' assert new_job.from_cached is False assert new_job.job_status_id == JOB_STATUS_DICT['finished']
def test_generate_noncached_d2_file_success(monkeypatch, mock_broker_config_paths, database): """ Testing that a new D2 file is generated """ sess = database.session job1 = JobFactory(job_status_id=JOB_STATUS_DICT['waiting'], job_type_id=JOB_TYPE_DICT['file_upload'], file_type_id=FILE_TYPE_DICT['award_procurement'], filename=str( mock_broker_config_paths['d_file_storage_path'].join( 'diff_agency')), start_date='01/01/2017', end_date='01/31/2017', original_filename='diff_agency', from_cached=False) job2 = JobFactory(job_status_id=JOB_STATUS_DICT['waiting'], job_type_id=JOB_TYPE_DICT['file_upload'], file_type_id=FILE_TYPE_DICT['award'], filename=str( mock_broker_config_paths['d_file_storage_path'].join( 'diff_start_date')), start_date='01/02/2017', end_date='01/31/2017', original_filename='diff_start_date', from_cached=False) job3 = JobFactory(job_status_id=JOB_STATUS_DICT['waiting'], job_type_id=JOB_TYPE_DICT['file_upload'], file_type_id=FILE_TYPE_DICT['award_procurement'], filename=str( mock_broker_config_paths['d_file_storage_path'].join( 'diff_end_date')), start_date='01/01/2017', end_date='01/30/2017', original_filename='diff_end_date', from_cached=False) sess.add_all([job1, job2, job3]) sess.commit() file_request1 = FileRequestFactory(job=job1, is_cached_file=True, agency_code='124', agency_type='awarding', start_date='01/01/2017', end_date='01/31/2017', file_type='D2', request_date=datetime.now().date()) file_request2 = FileRequestFactory(job=job1, is_cached_file=True, agency_code='123', agency_type='awarding', start_date='01/02/2017', end_date='01/31/2017', file_type='D2', request_date=datetime.now().date()) file_request3 = FileRequestFactory(job=job1, is_cached_file=True, agency_code='123', agency_type='awarding', start_date='01/01/2017', end_date='01/30/2017', file_type='D2', request_date=datetime.now().date()) job = JobFactory(job_status_id=JOB_STATUS_DICT['waiting'], job_type_id=JOB_TYPE_DICT['file_upload'], file_type_id=FILE_TYPE_DICT['award_procurement'], start_date='01/01/2017', end_date='01/31/2017') sess.add_all([job, file_request1, file_request2, file_request3]) sess.commit() file_gen_manager = FileGenerationManager(job, '123', 'awarding', CONFIG_BROKER['local']) file_gen_manager.generate_from_job() sess.refresh(job) file_request = sess.query(FileRequest).filter( FileRequest.job_id == job.job_id).one_or_none() assert file_request is not None assert file_request.is_cached_file is True assert file_request.start_date == job.start_date assert file_request.end_date == job.end_date assert file_request.agency_code == '123' assert file_request.request_date == datetime.now().date() assert job.original_filename != job1.original_filename assert job.original_filename != job2.original_filename assert job.original_filename != job3.original_filename assert job.from_cached is False assert job.job_status_id == JOB_STATUS_DICT['finished']
def run_app(): """Run the application.""" app = create_app() # This is for DataDog (Do Not Delete) if USE_DATADOG: TraceMiddleware(app, tracer, service="broker-dd", distributed_tracing=False) with app.app_context(): current_app.debug = CONFIG_SERVICES['debug'] local = CONFIG_BROKER['local'] g.is_local = local error_report_path = CONFIG_SERVICES['error_report_path'] current_app.config.from_object(__name__) # Create connection to job tracker database sess = GlobalDB.db().session # Future: Override config w/ environment variable, if set current_app.config.from_envvar('VALIDATOR_SETTINGS', silent=True) queue = sqs_queue() messages = [] logger.info("Starting SQS polling") while True: # Set current_message to None before every loop to ensure it's never set to the previous message current_message = None try: # Grabs one (or more) messages from the queue messages = queue.receive_messages( WaitTimeSeconds=10, MessageAttributeNames=['All']) for message in messages: logger.info("Message received: %s", message.body) # Retrieve the job_id from the message body current_message = message g.job_id = message.body mark_job_status(g.job_id, "ready") # Get the job job = sess.query(Job).filter_by( job_id=g.job_id).one_or_none() if job is None: validation_error_type = ValidationError.jobError write_file_error(g.job_id, None, validation_error_type) raise ResponseException( 'Job ID {} not found in database'.format(g.job_id), StatusCode.CLIENT_ERROR, None, validation_error_type) # We have two major functionalities in the Validator: validation and file generation if (not job.file_type or job.file_type.letter_name in ['A', 'B', 'C', 'FABS'] or job.job_type.name != 'file_upload') and job.submission_id: # Run validations validation_manager = ValidationManager( local, error_report_path) validation_manager.validate_job(job.job_id) else: # Retrieve the agency code data from the message attributes msg_attr = current_message.message_attributes agency_code = msg_attr['agency_code']['StringValue'] if msg_attr and \ msg_attr.get('agency_code') else None agency_type = msg_attr['agency_type']['StringValue'] if msg_attr and \ msg_attr.get('agency_type') else None file_generation_manager = FileGenerationManager( job, agency_code, agency_type, local) file_generation_manager.generate_from_job() sess.commit() sess.refresh(job) # Delete from SQS once processed message.delete() except ResponseException as e: # Handle exceptions explicitly raised during validation. logger.error(traceback.format_exc()) job = get_current_job() if job: if job.filename is not None: # Insert file-level error info to the database write_file_error(job.job_id, job.filename, e.errorType, e.extraInfo) if e.errorType != ValidationError.jobError: # Job passed prerequisites for validation but an error happened somewhere: mark job as 'invalid' mark_job_status(job.job_id, 'invalid') if current_message: if e.errorType in [ ValidationError.rowCountError, ValidationError.headerError, ValidationError.fileTypeError ]: current_message.delete() except Exception as e: # Handle uncaught exceptions in validation process. logger.error(traceback.format_exc()) # csv-specific errors get a different job status and response code if isinstance(e, ValueError) or isinstance( e, csv.Error) or isinstance(e, UnicodeDecodeError): job_status = 'invalid' else: job_status = 'failed' job = get_current_job() if job: if job.filename is not None: error_type = ValidationError.unknownError if isinstance(e, UnicodeDecodeError): error_type = ValidationError.encodingError # TODO Is this really the only case where the message should be deleted? if current_message: current_message.delete() write_file_error(job.job_id, job.filename, error_type) mark_job_status(job.job_id, job_status) finally: GlobalDB.close() # Set visibility to 0 so that another attempt can be made to process in SQS immediately, # instead of waiting for the timeout window to expire for message in messages: try: message.change_visibility(VisibilityTimeout=0) except ClientError: # Deleted messages will throw errors, which is fine because they are handled pass