def job_context(job_id, is_local=True):
    """Common context for files D1, D2, E, and F generation. Handles marking the job finished and/or failed"""
    # Flask context ensures we have access to global.g
    with Flask(__name__).app_context():
        sess = GlobalDB.db().session
        try:
            yield sess
            logger.info({
                'message': 'Marking job {} as finished'.format(job_id),
                'message_type': 'BrokerInfo',
                'job_id': job_id
            })
            mark_job_status(job_id, "finished")
        except Exception as e:
            # logger.exception() automatically adds traceback info
            logger.exception({
                'message':
                'Marking job {} as failed'.format(job_id),
                'message_type':
                'BrokerException',
                'job_id':
                job_id,
                'exception':
                str(e)
            })
            job = sess.query(Job).filter_by(job_id=job_id).one_or_none()
            if job:
                # mark job as failed
                job.error_message = str(e)
                sess.commit()
                mark_job_status(job_id, "failed")

                # ensure FileRequest from failed job is not cached
                file_request = sess.query(FileRequest).filter_by(
                    job_id=job_id).one_or_none()
                if file_request and file_request.is_cached_file:
                    file_request.is_cached_file = False
                    sess.commit()
        finally:
            file_request = sess.query(FileRequest).filter_by(
                job_id=job_id).one_or_none()
            if file_request and file_request.is_cached_file:
                # copy job data to all child FileRequests
                child_requests = sess.query(FileRequest).filter_by(
                    parent_job_id=job_id).all()
                file_type = FILE_TYPE_DICT_LETTER[
                    file_request.job.file_type_id]
                for child in child_requests:
                    copy_parent_file_request_data(sess, child.job,
                                                  file_request.job, file_type,
                                                  is_local)

            GlobalDB.close()
def full_database_setup():
    """Sets up a clean database based on the model metadata. It also
    calculates the FK relationships between tables so we can delete them in
    order. It yields a tuple the _DB and ordered list of tables."""
    rand_id = str(randint(1, 9999))

    config = dataactcore.config.CONFIG_DB
    config['db_name'] = 'unittest{}_data_broker'.format(rand_id)
    dataactcore.config.CONFIG_DB = config

    create_database(config['db_name'])
    db = GlobalDB.db()
    run_migrations()

    creation_order = baseModel.Base.metadata.sorted_tables
    yield (db, list(reversed(creation_order)))  # drop order

    GlobalDB.close()
    drop_database(config['db_name'])
def full_database_setup():
    """Sets up a clean database based on the model metadata. It also
    calculates the FK relationships between tables so we can delete them in
    order. It yields a tuple the _DB and ordered list of tables."""
    rand_id = str(randint(1, 9999))

    config = dataactcore.config.CONFIG_DB
    config['db_name'] = 'unittest{}_data_broker'.format(rand_id)
    dataactcore.config.CONFIG_DB = config

    create_database(config['db_name'])
    db = GlobalDB.db()
    run_migrations()

    creation_order = baseModel.Base.metadata.sorted_tables
    yield (db, list(reversed(creation_order)))  # drop order

    GlobalDB.close()
    drop_database(config['db_name'])
def job_context(job_id):
    """Common context for file E and F generation. Handles marking the job
    finished and/or failed"""
    # Flask context ensures we have access to global.g
    with Flask(__name__).app_context():
        sess = GlobalDB.db().session
        try:
            yield sess
            logger.debug('Marking job as finished')
            mark_job_status(job_id, "finished")
        except Exception as e:
            # logger.exception() automatically adds traceback info
            logger.exception('Job %s failed', job_id)
            job = sess.query(Job).filter_by(job_id=job_id).one_or_none()
            if job:
                job.error_message = str(e)
                sess.commit()
                mark_job_status(job_id, "failed")
        finally:
            GlobalDB.close()
示例#5
0
 def teardown_appcontext(exception):
     GlobalDB.close()
 def tearDownClass(cls):
     """Tear down class-level resources."""
     GlobalDB.close()
     drop_database(CONFIG_DB['db_name'])
示例#7
0
def job_context(job_id, is_local=True):
    """ Common context for files D1, D2, E, and F generation. Handles marking the job finished and/or failed

        Args:
            job_id: the ID of the submission job
            is_local: a boolean indicating whether this is being run in a local environment or not

        Yields:
            The current DB session
    """
    # Flask context ensures we have access to global.g
    with Flask(__name__).app_context():
        sess, job = retrieve_job_context_data(job_id)
        try:
            yield sess, job
            if not job.from_cached:
                # only mark completed jobs as done
                logger.info({
                    'message':
                    'Marking job {} as finished'.format(job.job_id),
                    'job_id':
                    job.job_id,
                    'message_type':
                    'ValidatorInfo'
                })
                mark_job_status(job.job_id, "finished")
        except Exception as e:
            # logger.exception() automatically adds traceback info
            logger.exception({
                'message':
                'Marking job {} as failed'.format(job.job_id),
                'job_id':
                job.job_id,
                'message_type':
                'ValidatorException',
                'exception':
                str(e)
            })

            # mark job as failed
            job.error_message = str(e)
            mark_job_status(job.job_id, "failed")

            # ensure FileRequest from failed job is not cached
            file_request = sess.query(FileRequest).filter_by(
                job_id=job.job_id).one_or_none()
            if file_request and file_request.is_cached_file:
                file_request.is_cached_file = False

            sess.commit()

        finally:
            file_request = sess.query(FileRequest).filter_by(
                job_id=job.job_id).one_or_none()
            if file_request and file_request.is_cached_file:
                # copy job data to all child FileRequests
                child_requests = sess.query(FileRequest).filter_by(
                    parent_job_id=job.job_id).all()
                if len(child_requests) > 0:
                    logger.info({
                        'message':
                        'Copying file data from job {} to its children'.format(
                            job.job_id),
                        'message_type':
                        'ValidatorInfo',
                        'job_id':
                        job.job_id
                    })
                    for child in child_requests:
                        copy_parent_file_request_data(sess, child.job, job,
                                                      is_local)
            GlobalDB.close()
示例#8
0
def run_app():
    """Run the application."""
    app = create_app()

    # This is for DataDog (Do Not Delete)
    if USE_DATADOG:
        TraceMiddleware(app,
                        tracer,
                        service="broker-dd",
                        distributed_tracing=False)

    with app.app_context():
        current_app.debug = CONFIG_SERVICES['debug']
        local = CONFIG_BROKER['local']
        g.is_local = local
        error_report_path = CONFIG_SERVICES['error_report_path']
        current_app.config.from_object(__name__)

        # Create connection to job tracker database
        sess = GlobalDB.db().session

        # Future: Override config w/ environment variable, if set
        current_app.config.from_envvar('VALIDATOR_SETTINGS', silent=True)

        queue = sqs_queue()
        messages = []

        logger.info("Starting SQS polling")
        while True:
            # Set current_message to None before every loop to ensure it's never set to the previous message
            current_message = None
            try:
                # Grabs one (or more) messages from the queue
                messages = queue.receive_messages(
                    WaitTimeSeconds=10, MessageAttributeNames=['All'])
                for message in messages:
                    logger.info("Message received: %s", message.body)

                    # Retrieve the job_id from the message body
                    current_message = message
                    g.job_id = message.body
                    mark_job_status(g.job_id, "ready")

                    # Get the job
                    job = sess.query(Job).filter_by(
                        job_id=g.job_id).one_or_none()
                    if job is None:
                        validation_error_type = ValidationError.jobError
                        write_file_error(g.job_id, None, validation_error_type)
                        raise ResponseException(
                            'Job ID {} not found in database'.format(g.job_id),
                            StatusCode.CLIENT_ERROR, None,
                            validation_error_type)

                    # We have two major functionalities in the Validator: validation and file generation
                    if (not job.file_type or job.file_type.letter_name
                            in ['A', 'B', 'C', 'FABS'] or job.job_type.name !=
                            'file_upload') and job.submission_id:
                        # Run validations
                        validation_manager = ValidationManager(
                            local, error_report_path)
                        validation_manager.validate_job(job.job_id)
                    else:
                        # Retrieve the agency code data from the message attributes
                        msg_attr = current_message.message_attributes
                        agency_code = msg_attr['agency_code']['StringValue'] if msg_attr and \
                            msg_attr.get('agency_code') else None
                        agency_type = msg_attr['agency_type']['StringValue'] if msg_attr and \
                            msg_attr.get('agency_type') else None

                        file_generation_manager = FileGenerationManager(
                            job, agency_code, agency_type, local)
                        file_generation_manager.generate_from_job()
                        sess.commit()
                        sess.refresh(job)

                    # Delete from SQS once processed
                    message.delete()

            except ResponseException as e:
                # Handle exceptions explicitly raised during validation.
                logger.error(traceback.format_exc())

                job = get_current_job()
                if job:
                    if job.filename is not None:
                        # Insert file-level error info to the database
                        write_file_error(job.job_id, job.filename, e.errorType,
                                         e.extraInfo)
                    if e.errorType != ValidationError.jobError:
                        # Job passed prerequisites for validation but an error happened somewhere: mark job as 'invalid'
                        mark_job_status(job.job_id, 'invalid')
                        if current_message:
                            if e.errorType in [
                                    ValidationError.rowCountError,
                                    ValidationError.headerError,
                                    ValidationError.fileTypeError
                            ]:
                                current_message.delete()
            except Exception as e:
                # Handle uncaught exceptions in validation process.
                logger.error(traceback.format_exc())

                # csv-specific errors get a different job status and response code
                if isinstance(e, ValueError) or isinstance(
                        e, csv.Error) or isinstance(e, UnicodeDecodeError):
                    job_status = 'invalid'
                else:
                    job_status = 'failed'
                job = get_current_job()
                if job:
                    if job.filename is not None:
                        error_type = ValidationError.unknownError
                        if isinstance(e, UnicodeDecodeError):
                            error_type = ValidationError.encodingError
                            # TODO Is this really the only case where the message should be deleted?
                            if current_message:
                                current_message.delete()
                        write_file_error(job.job_id, job.filename, error_type)
                    mark_job_status(job.job_id, job_status)
            finally:
                GlobalDB.close()
                # Set visibility to 0 so that another attempt can be made to process in SQS immediately,
                # instead of waiting for the timeout window to expire
                for message in messages:
                    try:
                        message.change_visibility(VisibilityTimeout=0)
                    except ClientError:
                        # Deleted messages will throw errors, which is fine because they are handled
                        pass
示例#9
0
 def teardown_appcontext(exception):
     GlobalDB.close()
def run_app():
    """Run the application."""
    app = Flask(__name__)

    with app.app_context():
        current_app.debug = CONFIG_SERVICES['debug']
        local = CONFIG_BROKER['local']
        g.is_local = local
        error_report_path = CONFIG_SERVICES['error_report_path']
        current_app.config.from_object(__name__)

        # Future: Override config w/ environment variable, if set
        current_app.config.from_envvar('VALIDATOR_SETTINGS', silent=True)

        queue = sqs_queue()

        logger.info("Starting SQS polling")
        current_message = None
        while True:
            try:
                # Grabs one (or more) messages from the queue
                messages = queue.receive_messages(WaitTimeSeconds=10)
                for message in messages:
                    logger.info("Message received: %s", message.body)
                    current_message = message
                    GlobalDB.db()
                    g.job_id = message.body
                    mark_job_status(g.job_id, "ready")
                    validation_manager = ValidationManager(local, error_report_path)
                    validation_manager.validate_job(g.job_id)

                    # delete from SQS once processed
                    message.delete()
            except ResponseException as e:
                # Handle exceptions explicitly raised during validation.
                logger.error(str(e))

                job = get_current_job()
                if job:
                    if job.filename is not None:
                        # insert file-level error info to the database
                        write_file_error(job.job_id, job.filename, e.errorType, e.extraInfo)
                    if e.errorType != ValidationError.jobError:
                        # job pass prerequisites for validation, but an error
                        # happened somewhere. mark job as 'invalid'
                        mark_job_status(job.job_id, 'invalid')
                        if current_message:
                            if e.errorType in [ValidationError.rowCountError, ValidationError.headerError,
                                               ValidationError.fileTypeError]:
                                current_message.delete()
            except Exception as e:
                # Handle uncaught exceptions in validation process.
                logger.error(str(e))

                # csv-specific errors get a different job status and response code
                if isinstance(e, ValueError) or isinstance(e, csv.Error) or isinstance(e, UnicodeDecodeError):
                    job_status = 'invalid'
                else:
                    job_status = 'failed'
                job = get_current_job()
                if job:
                    if job.filename is not None:
                        error_type = ValidationError.unknownError
                        if isinstance(e, UnicodeDecodeError):
                            error_type = ValidationError.encodingError
                            # TODO Is this really the only case where the message should be deleted?
                            if current_message:
                                current_message.delete()
                        write_file_error(job.job_id, job.filename, error_type)
                    mark_job_status(job.job_id, job_status)
            finally:
                GlobalDB.close()
                # Set visibility to 0 so that another attempt can be made to process in SQS immediately,
                # instead of waiting for the timeout window to expire
                for message in messages:
                    message.change_visibility(VisibilityTimeout=0)