示例#1
0
def start_e_f_generation(job):
    """ Passes the Job ID for an E or F generation Job to SQS

        Args:
            job: File generation job to start
    """
    mark_job_status(job.job_id, "waiting")

    file_type = job.file_type.letter_name
    log_data = {
        'message':
        'Sending {} file generation job {} to Validator in SQS'.format(
            file_type, job.job_id),
        'message_type':
        'BrokerInfo',
        'submission_id':
        job.submission_id,
        'job_id':
        job.job_id,
        'file_type':
        file_type
    }
    logger.info(log_data)

    # Add job_id to the SQS job queue
    queue = sqs_queue()
    msg_response = queue.send_message(MessageBody=str(job.job_id),
                                      MessageAttributes={})

    log_data['message'] = 'SQS message response: {}'.format(msg_response)
    logger.debug(log_data)
def start_a_generation(job, start_date, end_date, agency_code):
    """ Validates the start and end dates of the generation and sends the job information to SQS.

        Args:
            job: File generation job to start
            start_date: String to parse as the start date of the generation
            end_date: String to parse as the end date of the generation
            agency_code: Agency code for A file generations
    """
    if not (StringCleaner.is_date(start_date) and StringCleaner.is_date(end_date)):
        raise ResponseException("Start or end date cannot be parsed into a date of format MM/DD/YYYY",
                                StatusCode.CLIENT_ERROR)

    # Update the Job's start and end dates
    sess = GlobalDB.db().session
    job.start_date = start_date
    job.end_date = end_date
    sess.commit()

    mark_job_status(job.job_id, "waiting")

    file_type = job.file_type.letter_name
    log_data = {'message': 'Sending {} file generation job {} to Validator in SQS'.format(file_type, job.job_id),
                'message_type': 'BrokerInfo', 'job_id': job.job_id, 'file_type': file_type}
    logger.info(log_data)

    # Set SQS message attributes
    message_attr = {'agency_code': {'DataType': 'String', 'StringValue': agency_code}}

    # Add job_id to the SQS job queue
    queue = sqs_queue()
    msg_response = queue.send_message(MessageBody=str(job.job_id), MessageAttributes=message_attr)

    log_data['message'] = 'SQS message response: {}'.format(msg_response)
    logger.debug(log_data)
def start_a_generation(job, start_date, end_date, agency_code):
    """ Validates the start and end dates of the generation and sends the job information to SQS.

        Args:
            job: File generation job to start
            start_date: String to parse as the start date of the generation
            end_date: String to parse as the end date of the generation
            agency_code: Agency code for A file generations
    """
    if not (StringCleaner.is_date(start_date) and StringCleaner.is_date(end_date)):
        raise ResponseException("Start or end date cannot be parsed into a date of format MM/DD/YYYY",
                                StatusCode.CLIENT_ERROR)

    # Update the Job's start and end dates
    sess = GlobalDB.db().session
    job.start_date = start_date
    job.end_date = end_date
    sess.commit()

    mark_job_status(job.job_id, "waiting")

    file_type = job.file_type.letter_name
    log_data = {'message': 'Sending {} file generation job {} to Validator in SQS'.format(file_type, job.job_id),
                'message_type': 'BrokerInfo', 'job_id': job.job_id, 'file_type': file_type}
    logger.info(log_data)

    # Set SQS message attributes
    message_attr = {'agency_code': {'DataType': 'String', 'StringValue': agency_code}}

    # Add job_id to the SQS job queue
    queue = sqs_queue()
    msg_response = queue.send_message(MessageBody=str(job.job_id), MessageAttributes=message_attr)

    log_data['message'] = 'SQS message response: {}'.format(msg_response)
    logger.debug(log_data)
def check_job_dependencies(job_id):
    """ For specified job, check which of its dependencies are ready to be started and add them to the queue

        Args:
            job_id: the ID of the job that was just finished

        Raises:
            ValueError: If the job provided is not finished
    """
    sess = GlobalDB.db().session
    log_data = {'message_type': 'CoreError', 'job_id': job_id}

    # raise exception if current job is not actually finished
    job = sess.query(Job).filter(Job.job_id == job_id).one()
    if job.job_status_id != JOB_STATUS_DICT['finished']:
        log_data[
            'message'] = 'Current job not finished, unable to check dependencies'
        logger.error(log_data)
        raise ValueError(
            'Current job not finished, unable to check dependencies')

    # get the jobs that are dependent on job_id being finished
    dependencies = sess.query(JobDependency).filter_by(
        prerequisite_id=job_id).all()
    for dependency in dependencies:
        dep_job_id = dependency.job_id
        if dependency.dependent_job.job_status_id != JOB_STATUS_DICT['waiting']:
            log_data['message_type'] = 'CoreError'
            log_data[
                'message'] = '{} (dependency of {}) is not in a \'waiting\' state'.format(
                    dep_job_id, job_id)
            logger.error(log_data)
        else:
            # find the number of this job's prerequisites that do not have a status of 'finished' or have errors.
            unfinished_prerequisites = sess.query(JobDependency).\
                join(Job, JobDependency.prerequisite_job).\
                filter(or_(Job.job_status_id != JOB_STATUS_DICT['finished'], Job.number_of_errors > 0),
                       JobDependency.job_id == dep_job_id).\
                count()
            if unfinished_prerequisites == 0:
                # this job has no unfinished prerequisite jobs, so it is eligible to be set to a 'ready' status and
                # added to the queue
                mark_job_status(dep_job_id, 'ready')

                # Only want to send validation jobs to the queue, other job types should be forwarded
                if dependency.dependent_job.job_type_name in [
                        'csv_record_validation', 'validation'
                ]:
                    # add dep_job_id to the SQS job queue
                    log_data['message_type'] = 'CoreInfo'
                    log_data[
                        'message'] = 'Sending job {} to job manager in sqs'.format(
                            dep_job_id)
                    logger.info(log_data)
                    queue = sqs_queue()
                    response = queue.send_message(MessageBody=str(dep_job_id))
                    log_data['message'] = 'Send message response: {}'.format(
                        response)
                    logger.info(log_data)
 def test_push_poll_queue(self):
     """ Adds a single message to the queue then retrieves it immediately. Default number of messages
     for retrieval is 1 message. """
     queue = sqs_queue()
     response = queue.send_message(MessageBody="1234")
     self.assertEqual(response['ResponseMetadata']['HTTPStatusCode'], 200)
     messages = queue.receive_messages(WaitTimeSeconds=10)
     self.assertNotEqual(messages, [])
     for message in messages:
         message.delete()
示例#6
0
 def test_push_poll_queue(self):
     """ Adds a single message to the queue then retrieves it immediately. Default number of messages
     for retrieval is 1 message. """
     queue = sqs_queue()
     response = queue.send_message(MessageBody="1234")
     self.assertEqual(response['ResponseMetadata']['HTTPStatusCode'], 200)
     messages = queue.receive_messages(WaitTimeSeconds=10)
     self.assertNotEqual(messages, [])
     for message in messages:
         message.delete()
def check_job_dependencies(job_id):
    """ For specified job, check which of its dependencies are ready to be started and add them to the queue

        Args:
            job_id: the ID of the job that was just finished

        Raises:
            ValueError: If the job provided is not finished
    """
    sess = GlobalDB.db().session
    log_data = {
        'message_type': 'CoreError',
        'job_id': job_id
    }

    # raise exception if current job is not actually finished
    job = sess.query(Job).filter(Job.job_id == job_id).one()
    if job.job_status_id != JOB_STATUS_DICT['finished']:
        log_data['message'] = 'Current job not finished, unable to check dependencies'
        logger.error(log_data)
        raise ValueError('Current job not finished, unable to check dependencies')

    # get the jobs that are dependent on job_id being finished
    dependencies = sess.query(JobDependency).filter_by(prerequisite_id=job_id).all()
    for dependency in dependencies:
        dep_job_id = dependency.job_id
        if dependency.dependent_job.job_status_id != JOB_STATUS_DICT['waiting']:
            log_data['message_type'] = 'CoreError'
            log_data['message'] = "{} (dependency of {}) is not in a 'waiting' state".format(dep_job_id, job_id)
            logger.error(log_data)
        else:
            # find the number of this job's prerequisites that do not have a status of 'finished' or have errors.
            unfinished_prerequisites = sess.query(JobDependency).\
                join(Job, JobDependency.prerequisite_job).\
                filter(or_(Job.job_status_id != JOB_STATUS_DICT['finished'], Job.number_of_errors > 0),
                       JobDependency.job_id == dep_job_id).\
                count()
            if unfinished_prerequisites == 0:
                # this job has no unfinished prerequisite jobs, so it is eligible to be set to a 'ready' status and
                # added to the queue
                mark_job_status(dep_job_id, 'ready')

                # Only want to send validation jobs to the queue, other job types should be forwarded
                if dependency.dependent_job.job_type_name in ['csv_record_validation', 'validation']:
                    # add dep_job_id to the SQS job queue
                    log_data['message_type'] = 'CoreInfo'
                    log_data['message'] = 'Sending job {} to job manager in sqs'.format(dep_job_id)
                    logger.info(log_data)
                    queue = sqs_queue()
                    response = queue.send_message(MessageBody=str(dep_job_id))
                    log_data['message'] = 'Send message response: {}'.format(response)
                    logger.info(log_data)
def start_d_generation(job, start_date, end_date, agency_type, agency_code=None):
    """ Validates the start and end dates of the generation, updates the submission's publish status and progress (if
        its not detached generation), and sends the job information to SQS.

        Args:
            job: File generation job to start
            start_date: String to parse as the start date of the generation
            end_date: String to parse as the end date of the generation
            agency_type: Type of Agency to generate files by: "awarding" or "funding"
            agency_code: Agency code for detached D file generations

        Returns:
            SQS send_message response
    """
    if not (StringCleaner.is_date(start_date) and StringCleaner.is_date(end_date)):
        raise ResponseException("Start or end date cannot be parsed into a date of format MM/DD/YYYY",
                                StatusCode.CLIENT_ERROR)

    # Update the Job's start and end dates
    sess = GlobalDB.db().session
    job.start_date = start_date
    job.end_date = end_date
    sess.commit()

    # Update submission
    if job.submission_id:
        agency_code = update_generation_submission(sess, job)

    mark_job_status(job.job_id, "waiting")

    log_data = {'message': 'Sending {} file generation job {} to SQS'.format(job.file_type.letter_name, job.job_id),
                'message_type': 'BrokerInfo', 'submission_id': job.submission_id, 'job_id': job.job_id,
                'file_type': job.file_type.letter_name}
    logger.info(log_data)

    file_request = retrieve_cached_file_request(job, agency_type, agency_code, g.is_local)
    if file_request:
        log_data['message'] = 'No new file generated, used FileRequest with ID {}'.format(file_request.file_request_id)
        logger.info(log_data)
    else:
        # Set SQS message attributes
        message_attr = {'agency_type': {'DataType': 'String', 'StringValue': agency_type}}
        if not job.submission_id:
            message_attr['agency_code'] = {'DataType': 'String', 'StringValue': agency_code}

        # Add job_id to the SQS job queue
        queue = sqs_queue()
        msg_response = queue.send_message(MessageBody=str(job.job_id), MessageAttributes=message_attr)

        log_data['message'] = 'SQS message response: {}'.format(msg_response)
        logger.debug(log_data)
def run_app():
    """ Run the application. """
    app = create_app()

    # This is for DataDog (Do Not Delete)
    if USE_DATADOG:
        TraceMiddleware(app,
                        tracer,
                        service="broker-dd",
                        distributed_tracing=False)

    with app.app_context():
        current_app.debug = CONFIG_SERVICES['debug']
        local = CONFIG_BROKER['local']
        g.is_local = local
        current_app.config.from_object(__name__)

        # Future: Override config w/ environment variable, if set
        current_app.config.from_envvar('VALIDATOR_SETTINGS', silent=True)

        queue = sqs_queue()

        logger.info("Starting SQS polling")
        while True:
            # Grabs one (or more) messages from the queue
            messages = queue.receive_messages(WaitTimeSeconds=10,
                                              MessageAttributeNames=['All'])
            for message in messages:
                logger.info("Message received: %s", message.body)

                msg_attr = message.message_attributes
                if msg_attr and msg_attr.get(
                        'validation_type',
                    {}).get('StringValue') == 'generation':
                    # Generating a file
                    validator_process_file_generation(message.body)
                else:
                    # Running validations (or generating a file from a Job)
                    a_agency_code = msg_attr.get(
                        'agency_code',
                        {}).get('StringValue') if msg_attr else None
                    validator_process_job(message.body, a_agency_code)

                # Delete from SQS once processed
                message.delete()

            # When you receive an empty response from the queue, wait a second before trying again
            if len(messages) == 0:
                time.sleep(1)
def start_e_f_generation(job):
    """ Passes the Job ID for an E or F generation Job to SQS

        Args:
            job: File generation job to start
    """
    mark_job_status(job.job_id, "waiting")

    file_type = job.file_type.letter_name
    log_data = {'message': 'Sending {} file generation job {} to Validator in SQS'.format(file_type, job.job_id),
                'message_type': 'BrokerInfo', 'submission_id': job.submission_id, 'job_id': job.job_id,
                'file_type': file_type}
    logger.info(log_data)

    # Add job_id to the SQS job queue
    queue = sqs_queue()
    msg_response = queue.send_message(MessageBody=str(job.job_id), MessageAttributes={})

    log_data['message'] = 'SQS message response: {}'.format(msg_response)
    logger.debug(log_data)
def check_job_dependencies(job_id):
    """
    For specified job, check which of its dependencies are ready to be started
    and add them to the queue
    """
    sess = GlobalDB.db().session

    # raise exception if current job is not actually finished
    job = sess.query(Job).filter(Job.job_id == job_id).one()
    if job.job_status_id != JOB_STATUS_DICT['finished']:
        raise ValueError('Current job not finished, unable to check dependencies')

    # get the jobs that are dependent on job_id being finished
    dependencies = sess.query(JobDependency).filter_by(prerequisite_id=job_id).all()
    for dependency in dependencies:
        dep_job_id = dependency.job_id
        if dependency.dependent_job.job_status_id != JOB_STATUS_DICT['waiting']:
            logger.error("%s (dependency of %s) is not in a 'waiting' state",
                         dep_job_id, job_id)
        else:
            # find the number of this job's prerequisites that do
            # not have a status of 'finished'.
            unfinished_prerequisites = sess.query(JobDependency).\
                join(Job, JobDependency.prerequisite_job).\
                filter(
                    Job.job_status_id != JOB_STATUS_DICT['finished'],
                    JobDependency.job_id == dep_job_id).\
                count()
            if unfinished_prerequisites == 0:
                # this job has no unfinished prerequisite jobs,
                # so it is eligible to be set to a 'ready'
                # status and added to the queue
                mark_job_status(dep_job_id, 'ready')

                # Only want to send validation jobs to the queue, other job types should be forwarded
                if dependency.dependent_job.job_type_name in ['csv_record_validation', 'validation']:
                    # add dep_job_id to the SQS job queue
                    logger.info('Sending job %s to job manager in sqs', dep_job_id)
                    queue = sqs_queue()
                    response = queue.send_message(MessageBody=str(dep_job_id))
                    logger.info('Send message response: %s', response)
示例#12
0
def start_generation_job(job, start_date, end_date, agency_code=None):
    """ Validates the dates for a D file generation job and passes the Job ID to SQS

        Args:
            job: File generation job to start
            start_date: Start date of the file generation
            end_date: End date of the file generation
            agency_code: Agency code for detached D file generations

        Returns:
            Tuple of boolean indicating successful start, and error response if False
    """
    sess = GlobalDB.db().session
    file_type = job.file_type.letter_name
    try:
        if file_type in ['D1', 'D2']:
            # Validate and set Job's start and end dates
            if not (StringCleaner.is_date(start_date)
                    and StringCleaner.is_date(end_date)):
                raise ResponseException(
                    "Start or end date cannot be parsed into a date",
                    StatusCode.CLIENT_ERROR)
            job.start_date = start_date
            job.end_date = end_date
            sess.commit()
        elif file_type not in ["E", "F"]:
            raise ResponseException("File type must be either D1, D2, E or F",
                                    StatusCode.CLIENT_ERROR)

    except ResponseException as e:
        return False, JsonResponse.error(e,
                                         e.status,
                                         file_type=file_type,
                                         status='failed')

    mark_job_status(job.job_id, "waiting")

    # Add job_id to the SQS job queue
    logger.info({
        'message_type':
        'ValidatorInfo',
        'job_id':
        job.job_id,
        'message':
        'Sending file generation job {} to Validator in SQS'.format(job.job_id)
    })
    queue = sqs_queue()

    message_attr = {
        'agency_code': {
            'DataType': 'String',
            'StringValue': agency_code
        }
    } if agency_code else {}
    response = queue.send_message(MessageBody=str(job.job_id),
                                  MessageAttributes=message_attr)
    logger.debug({
        'message_type': 'ValidatorInfo',
        'job_id': job.job_id,
        'message': 'Send message response: {}'.format(response)
    })

    return True, None
示例#13
0
def run_app():
    """Run the application."""
    app = create_app()

    # This is for DataDog (Do Not Delete)
    if USE_DATADOG:
        TraceMiddleware(app,
                        tracer,
                        service="broker-dd",
                        distributed_tracing=False)

    with app.app_context():
        current_app.debug = CONFIG_SERVICES['debug']
        local = CONFIG_BROKER['local']
        g.is_local = local
        error_report_path = CONFIG_SERVICES['error_report_path']
        current_app.config.from_object(__name__)

        # Create connection to job tracker database
        sess = GlobalDB.db().session

        # Future: Override config w/ environment variable, if set
        current_app.config.from_envvar('VALIDATOR_SETTINGS', silent=True)

        queue = sqs_queue()
        messages = []

        logger.info("Starting SQS polling")
        while True:
            # Set current_message to None before every loop to ensure it's never set to the previous message
            current_message = None
            try:
                # Grabs one (or more) messages from the queue
                messages = queue.receive_messages(
                    WaitTimeSeconds=10, MessageAttributeNames=['All'])
                for message in messages:
                    logger.info("Message received: %s", message.body)

                    # Retrieve the job_id from the message body
                    current_message = message
                    g.job_id = message.body
                    mark_job_status(g.job_id, "ready")

                    # Get the job
                    job = sess.query(Job).filter_by(
                        job_id=g.job_id).one_or_none()
                    if job is None:
                        validation_error_type = ValidationError.jobError
                        write_file_error(g.job_id, None, validation_error_type)
                        raise ResponseException(
                            'Job ID {} not found in database'.format(g.job_id),
                            StatusCode.CLIENT_ERROR, None,
                            validation_error_type)

                    # We have two major functionalities in the Validator: validation and file generation
                    if (not job.file_type or job.file_type.letter_name
                            in ['A', 'B', 'C', 'FABS'] or job.job_type.name !=
                            'file_upload') and job.submission_id:
                        # Run validations
                        validation_manager = ValidationManager(
                            local, error_report_path)
                        validation_manager.validate_job(job.job_id)
                    else:
                        # Retrieve the agency code data from the message attributes
                        msg_attr = current_message.message_attributes
                        agency_code = msg_attr['agency_code']['StringValue'] if msg_attr and \
                            msg_attr.get('agency_code') else None
                        agency_type = msg_attr['agency_type']['StringValue'] if msg_attr and \
                            msg_attr.get('agency_type') else None

                        file_generation_manager = FileGenerationManager(
                            job, agency_code, agency_type, local)
                        file_generation_manager.generate_from_job()
                        sess.commit()
                        sess.refresh(job)

                    # Delete from SQS once processed
                    message.delete()

            except ResponseException as e:
                # Handle exceptions explicitly raised during validation.
                logger.error(traceback.format_exc())

                job = get_current_job()
                if job:
                    if job.filename is not None:
                        # Insert file-level error info to the database
                        write_file_error(job.job_id, job.filename, e.errorType,
                                         e.extraInfo)
                    if e.errorType != ValidationError.jobError:
                        # Job passed prerequisites for validation but an error happened somewhere: mark job as 'invalid'
                        mark_job_status(job.job_id, 'invalid')
                        if current_message:
                            if e.errorType in [
                                    ValidationError.rowCountError,
                                    ValidationError.headerError,
                                    ValidationError.fileTypeError
                            ]:
                                current_message.delete()
            except Exception as e:
                # Handle uncaught exceptions in validation process.
                logger.error(traceback.format_exc())

                # csv-specific errors get a different job status and response code
                if isinstance(e, ValueError) or isinstance(
                        e, csv.Error) or isinstance(e, UnicodeDecodeError):
                    job_status = 'invalid'
                else:
                    job_status = 'failed'
                job = get_current_job()
                if job:
                    if job.filename is not None:
                        error_type = ValidationError.unknownError
                        if isinstance(e, UnicodeDecodeError):
                            error_type = ValidationError.encodingError
                            # TODO Is this really the only case where the message should be deleted?
                            if current_message:
                                current_message.delete()
                        write_file_error(job.job_id, job.filename, error_type)
                    mark_job_status(job.job_id, job_status)
            finally:
                GlobalDB.close()
                # Set visibility to 0 so that another attempt can be made to process in SQS immediately,
                # instead of waiting for the timeout window to expire
                for message in messages:
                    try:
                        message.change_visibility(VisibilityTimeout=0)
                    except ClientError:
                        # Deleted messages will throw errors, which is fine because they are handled
                        pass
def start_d_generation(job, start_date, end_date, agency_type, agency_code=None):
    """ Validates the start and end dates of the generation, updates the submission's publish status and progress (if
        its not detached generation), and sends the job information to SQS.

        Args:
            job: File generation job to start
            start_date: String to parse as the start date of the generation
            end_date: String to parse as the end date of the generation
            agency_type: Type of Agency to generate files by: "awarding" or "funding"
            agency_code: Agency code for detached D file generations
    """
    if not (StringCleaner.is_date(start_date) and StringCleaner.is_date(end_date)):
        raise ResponseException("Start or end date cannot be parsed into a date of format MM/DD/YYYY",
                                StatusCode.CLIENT_ERROR)

    # Update the Job's start and end dates
    sess = GlobalDB.db().session
    job.start_date = start_date
    job.end_date = end_date
    sess.commit()

    # Update submission
    if job.submission_id:
        agency_code = update_generation_submission(sess, job)

    mark_job_status(job.job_id, 'waiting')

    file_generation = retrieve_cached_file_generation(job, agency_type, agency_code)
    if file_generation:
        try:
            copy_file_generation_to_job(job, file_generation, g.is_local)
        except Exception as e:
            logger.error(traceback.format_exc())

            mark_job_status(job.job_id, 'failed')
            job.error_message = str(e)
            sess.commit()
    else:
        # Create new FileGeneration and reset Jobs
        file_generation = FileGeneration(
            request_date=datetime.now().date(), start_date=job.start_date, end_date=job.end_date,
            file_type=job.file_type.letter_name, agency_code=agency_code, agency_type=agency_type, is_cached_file=True)
        sess.add(file_generation)
        sess.commit()

        try:
            job.file_generation_id = file_generation.file_generation_id
            sess.commit()
            reset_generation_jobs(sess, job)
            logger.info({'message': 'Sending new FileGeneration {} to SQS'.format(file_generation.file_generation_id),
                         'message_type': 'BrokerInfo', 'file_type': job.file_type.letter_name, 'job_id': job.job_id,
                         'submission_id': job.submission_id, 'file_generation_id': file_generation.file_generation_id})

            # Add file_generation_id to the SQS job queue
            queue = sqs_queue()
            message_attr = {"validation_type": {"DataType": "String", "StringValue": "generation"}}
            queue.send_message(MessageBody=str(file_generation.file_generation_id), MessageAttributes=message_attr)
        except Exception as e:
            logger.error(traceback.format_exc())

            mark_job_status(job.job_id, 'failed')
            job.error_message = str(e)
            file_generation.is_cached_file = False
            sess.commit()
示例#15
0
def run_app():
    """ Run the application. """
    app = create_app()

    with app.app_context():
        current_app.debug = CONFIG_SERVICES['debug']
        local = CONFIG_BROKER['local']
        g.is_local = local
        current_app.config.from_object(__name__)

        # Future: Override config w/ environment variable, if set
        current_app.config.from_envvar('VALIDATOR_SETTINGS', silent=True)

        queue = sqs_queue()

        logger.info("Starting SQS polling")
        keep_polling = True
        while keep_polling:

            # Start a Datadog Trace for this poll iter to capture activity in APM
            with tracer.trace(
                name="job.{}".format(JOB_TYPE),
                service=JOB_TYPE.lower(),
                resource=queue.url,
                span_type=SpanTypes.WORKER
            ) as span:

                # With cleanup handling engaged, allowing retries
                dispatcher = SQSWorkDispatcher(queue)

                def choose_job_by_message_attributes(message):
                    # Determine if this is a retry of this message, in which case job execution should know so it can
                    # do cleanup before proceeding with the job
                    q_msg_attr = message.attributes  # the non-user-defined (queue-defined) attributes on the message
                    is_retry = False
                    if q_msg_attr.get('ApproximateReceiveCount') is not None:
                        is_retry = int(q_msg_attr.get('ApproximateReceiveCount')) > 1

                    msg_attr = message.message_attributes
                    if msg_attr and msg_attr.get('validation_type', {}).get('StringValue') == 'generation':
                        # Generating a file
                        job_signature = {"_job": validator_process_file_generation,
                                         "file_gen_id": message.body,
                                         "is_retry": is_retry}
                    else:
                        # Running validations (or generating a file from a Job)
                        a_agency_code = msg_attr.get('agency_code', {}).get('StringValue') if msg_attr else None
                        job_signature = {"_job": validator_process_job,
                                         "job_id": message.body,
                                         "agency_code": a_agency_code,
                                         "is_retry": is_retry}
                    return job_signature

                found_message = dispatcher.dispatch_by_message_attribute(choose_job_by_message_attributes)

                if not found_message:
                    # Drop the Datadog trace, since no trace-worthy activity happened on this poll
                    tracer.context_provider.active().sampling_priority = USER_REJECT
                    span.set_tag(DatadogEagerlyDropTraceFilter.EAGERLY_DROP_TRACE_KEY, True)

                    # When you receive an empty response from the queue, wait before trying again
                    time.sleep(1)

                # If this process is exiting, don't poll for more work
                keep_polling = not dispatcher.is_exiting
示例#16
0
def start_d_generation(job,
                       start_date,
                       end_date,
                       agency_type,
                       agency_code=None,
                       file_format='csv'):
    """ Validates the start and end dates of the generation, updates the submission's publish status and progress (if
        its not detached generation), and sends the job information to SQS.

        Args:
            job: File generation job to start
            start_date: String to parse as the start date of the generation
            end_date: String to parse as the end date of the generation
            agency_type: Type of Agency to generate files by: "awarding" or "funding"
            agency_code: Agency code for detached D file generations
            file_format: determines if the file generated is a txt or a csv
    """
    if not (StringCleaner.is_date(start_date)
            and StringCleaner.is_date(end_date)):
        raise ResponseException(
            "Start or end date cannot be parsed into a date of format MM/DD/YYYY",
            StatusCode.CLIENT_ERROR)

    # Update the Job's start and end dates
    sess = GlobalDB.db().session
    job.start_date = start_date
    job.end_date = end_date
    sess.commit()

    # Update submission
    if job.submission_id:
        agency_code = update_generation_submission(sess, job)

    mark_job_status(job.job_id, 'waiting')

    file_generation = retrieve_cached_file_generation(job, agency_type,
                                                      agency_code, file_format)
    if file_generation:
        try:
            copy_file_generation_to_job(job, file_generation, g.is_local)
        except Exception as e:
            logger.error(traceback.format_exc())

            mark_job_status(job.job_id, 'failed')
            job.error_message = str(e)
            sess.commit()
    else:
        # Create new FileGeneration and reset Jobs
        file_generation = FileGeneration(request_date=datetime.now().date(),
                                         start_date=job.start_date,
                                         end_date=job.end_date,
                                         file_type=job.file_type.letter_name,
                                         agency_code=agency_code,
                                         agency_type=agency_type,
                                         file_format=file_format,
                                         is_cached_file=True)
        sess.add(file_generation)
        sess.commit()

        try:
            job.file_generation_id = file_generation.file_generation_id
            sess.commit()
            reset_generation_jobs(sess, job)
            logger.info({
                'message':
                'Sending new FileGeneration {} to SQS'.format(
                    file_generation.file_generation_id),
                'message_type':
                'BrokerInfo',
                'file_type':
                job.file_type.letter_name,
                'job_id':
                job.job_id,
                'submission_id':
                job.submission_id,
                'file_generation_id':
                file_generation.file_generation_id
            })

            # Add file_generation_id to the SQS job queue
            queue = sqs_queue()
            message_attr = {
                "validation_type": {
                    "DataType": "String",
                    "StringValue": "generation"
                }
            }
            queue.send_message(MessageBody=str(
                file_generation.file_generation_id),
                               MessageAttributes=message_attr)
        except Exception as e:
            logger.error(traceback.format_exc())

            mark_job_status(job.job_id, 'failed')
            job.error_message = str(e)
            file_generation.is_cached_file = False
            sess.commit()
示例#17
0
def run_app():
    """ Run the application. """
    app = create_app()

    # This is for DataDog (Do Not Delete)
    if USE_DATADOG:
        TraceMiddleware(app,
                        tracer,
                        service="broker-dd",
                        distributed_tracing=False)

    with app.app_context():
        current_app.debug = CONFIG_SERVICES['debug']
        local = CONFIG_BROKER['local']
        g.is_local = local
        current_app.config.from_object(__name__)

        # Future: Override config w/ environment variable, if set
        current_app.config.from_envvar('VALIDATOR_SETTINGS', silent=True)

        queue = sqs_queue()

        logger.info("Starting SQS polling")
        keep_polling = True
        while keep_polling:
            # With cleanup handling engaged, allowing retries
            dispatcher = SQSWorkDispatcher(queue)

            def choose_job_by_message_attributes(message):
                # Determine if this is a retry of this message, in which case job execution should know so it can
                # do cleanup before proceeding with the job
                q_msg_attr = message.attributes  # the non-user-defined (queue-defined) attributes on the message
                is_retry = False
                if q_msg_attr.get('ApproximateReceiveCount') is not None:
                    is_retry = int(
                        q_msg_attr.get('ApproximateReceiveCount')) > 1

                msg_attr = message.message_attributes
                if msg_attr and msg_attr.get(
                        'validation_type',
                    {}).get('StringValue') == 'generation':
                    # Generating a file
                    job_signature = {
                        "_job": validator_process_file_generation,
                        "file_gen_id": message.body,
                        "is_retry": is_retry
                    }
                else:
                    # Running validations (or generating a file from a Job)
                    a_agency_code = msg_attr.get(
                        'agency_code',
                        {}).get('StringValue') if msg_attr else None
                    job_signature = {
                        "_job": validator_process_job,
                        "job_id": message.body,
                        "agency_code": a_agency_code,
                        "is_retry": is_retry
                    }
                return job_signature

            found_message = dispatcher.dispatch_by_message_attribute(
                choose_job_by_message_attributes)

            # When you receive an empty response from the queue, wait before trying again
            if not found_message:
                time.sleep(1)

            # If this process is exiting, don't poll for more work
            keep_polling = not dispatcher.is_exiting
def run_app():
    """Run the application."""
    app = Flask(__name__)

    with app.app_context():
        current_app.debug = CONFIG_SERVICES['debug']
        local = CONFIG_BROKER['local']
        g.is_local = local
        error_report_path = CONFIG_SERVICES['error_report_path']
        current_app.config.from_object(__name__)

        # Future: Override config w/ environment variable, if set
        current_app.config.from_envvar('VALIDATOR_SETTINGS', silent=True)

        queue = sqs_queue()

        logger.info("Starting SQS polling")
        current_message = None
        while True:
            try:
                # Grabs one (or more) messages from the queue
                messages = queue.receive_messages(WaitTimeSeconds=10)
                for message in messages:
                    logger.info("Message received: %s", message.body)
                    current_message = message
                    GlobalDB.db()
                    g.job_id = message.body
                    mark_job_status(g.job_id, "ready")
                    validation_manager = ValidationManager(local, error_report_path)
                    validation_manager.validate_job(g.job_id)

                    # delete from SQS once processed
                    message.delete()
            except ResponseException as e:
                # Handle exceptions explicitly raised during validation.
                logger.error(str(e))

                job = get_current_job()
                if job:
                    if job.filename is not None:
                        # insert file-level error info to the database
                        write_file_error(job.job_id, job.filename, e.errorType, e.extraInfo)
                    if e.errorType != ValidationError.jobError:
                        # job pass prerequisites for validation, but an error
                        # happened somewhere. mark job as 'invalid'
                        mark_job_status(job.job_id, 'invalid')
                        if current_message:
                            if e.errorType in [ValidationError.rowCountError, ValidationError.headerError,
                                               ValidationError.fileTypeError]:
                                current_message.delete()
            except Exception as e:
                # Handle uncaught exceptions in validation process.
                logger.error(str(e))

                # csv-specific errors get a different job status and response code
                if isinstance(e, ValueError) or isinstance(e, csv.Error) or isinstance(e, UnicodeDecodeError):
                    job_status = 'invalid'
                else:
                    job_status = 'failed'
                job = get_current_job()
                if job:
                    if job.filename is not None:
                        error_type = ValidationError.unknownError
                        if isinstance(e, UnicodeDecodeError):
                            error_type = ValidationError.encodingError
                            # TODO Is this really the only case where the message should be deleted?
                            if current_message:
                                current_message.delete()
                        write_file_error(job.job_id, job.filename, error_type)
                    mark_job_status(job.job_id, job_status)
            finally:
                GlobalDB.close()
                # Set visibility to 0 so that another attempt can be made to process in SQS immediately,
                # instead of waiting for the timeout window to expire
                for message in messages:
                    message.change_visibility(VisibilityTimeout=0)