def retrieve_cached_file_generation(job, agency_type, agency_code):
    """ Retrieves a cached FileGeneration for the D file request, if there is one.

        Args:
            job: Upload Job for the generation file
            agency_type: Type of Agency to generate files by: "awarding" or "funding"
            agency_code: Agency code to generate file for

        Returns:
            FileGeneration object matching the criteria, or None
    """
    sess = GlobalDB.db().session
    logger.info({'message': 'Checking for a cached FileGeneration to pull file from', 'message_type': 'BrokerInfo',
                 'submission_id': job.submission_id, 'job_id': job.job_id, 'file_type': job.file_type.letter_name})

    # find current date and date of last FPDS pull
    current_date = datetime.now().date()
    last_update = sess.query(FPDSUpdate).one_or_none()
    fpds_date = last_update.update_date if last_update else current_date

    # check if a cached FileGeneration already exists using these criteria
    file_generation = None
    file_gen = sess.query(FileGeneration).filter(
        FileGeneration.start_date == job.start_date, FileGeneration.end_date == job.end_date,
        FileGeneration.agency_code == agency_code,  FileGeneration.agency_type == agency_type,
        FileGeneration.file_type == job.file_type.letter_name, FileGeneration.is_cached_file.is_(True)).one_or_none()

    if file_gen and (file_gen.file_type == 'D1' and file_gen.request_date < fpds_date):
        # Uncache expired D1 FileGeneration
        file_gen.is_cached_file = False
        sess.commit()
    elif file_gen:
        file_generation = file_gen

    return file_generation
def get_submission_data(submission, file_type=''):
    """ Get data for the submission specified

        Args:
            submission: submission to retrieve metadata for
            file_type: the type of job to retrieve metadata for

        Returns:
            JsonResponse containing the error information or the object containing metadata for all relevant file types
    """
    sess = GlobalDB.db().session
    file_type = file_type.lower()

    # Make sure the file type provided is valid
    if file_type and file_type not in FILE_TYPE_DICT and file_type != 'cross':
        return JsonResponse.error(ValueError(file_type + ' is not a valid file type'), StatusCode.CLIENT_ERROR)

    # Make sure the file type provided is valid for the submission type
    is_fabs = submission.d2_submission
    if file_type and (is_fabs and file_type != 'fabs') or (not is_fabs and file_type == 'fabs'):
        return JsonResponse.error(ValueError(file_type + ' is not a valid file type for this submission'),
                                  StatusCode.CLIENT_ERROR)

    job_query = sess.query(Job).filter(Job.submission_id == submission.submission_id)
    if not file_type:
        relevant_job_types = (JOB_TYPE_DICT['csv_record_validation'], JOB_TYPE_DICT['validation'])
        job_query = job_query.filter(Job.job_type_id.in_(relevant_job_types))
    elif file_type == 'cross':
        job_query = job_query.filter(Job.job_type_id == JOB_TYPE_DICT['validation'])
    else:
        job_query = job_query.filter(Job.file_type_id == FILE_TYPE_DICT[file_type])

    job_dict = {'jobs': [job_to_dict(job) for job in job_query]}
    return JsonResponse.create(StatusCode.OK, job_dict)
def start_a_generation(job, start_date, end_date, agency_code):
    """ Validates the start and end dates of the generation and sends the job information to SQS.

        Args:
            job: File generation job to start
            start_date: String to parse as the start date of the generation
            end_date: String to parse as the end date of the generation
            agency_code: Agency code for A file generations
    """
    if not (StringCleaner.is_date(start_date) and StringCleaner.is_date(end_date)):
        raise ResponseException("Start or end date cannot be parsed into a date of format MM/DD/YYYY",
                                StatusCode.CLIENT_ERROR)

    # Update the Job's start and end dates
    sess = GlobalDB.db().session
    job.start_date = start_date
    job.end_date = end_date
    sess.commit()

    mark_job_status(job.job_id, "waiting")

    file_type = job.file_type.letter_name
    log_data = {'message': 'Sending {} file generation job {} to Validator in SQS'.format(file_type, job.job_id),
                'message_type': 'BrokerInfo', 'job_id': job.job_id, 'file_type': file_type}
    logger.info(log_data)

    # Set SQS message attributes
    message_attr = {'agency_code': {'DataType': 'String', 'StringValue': agency_code}}

    # Add job_id to the SQS job queue
    queue = sqs_queue()
    msg_response = queue.send_message(MessageBody=str(job.job_id), MessageAttributes=message_attr)

    log_data['message'] = 'SQS message response: {}'.format(msg_response)
    logger.debug(log_data)
def load_frec(file_name):
    """ Load FREC (high-level agency names) lookup table.

        Args:
            file_name: path/url to the file to be read
    """
    sess = GlobalDB.db().session
    models = {frec.frec_code: frec for frec in sess.query(FREC)}

    # read FREC values from csv
    data = pd.read_csv(file_name, dtype=str)

    # clean data
    data = clean_data(
        data,
        FREC,
        {"frec": "frec_code", "cgac_agency_code": "cgac_code", "frec_entity_description": "agency_name",
         "agency_abbreviation": "agency_abbreviation"},
        {"frec": {"keep_null": False}, "cgac_code": {"pad_to_length": 3}, "frec_code": {"pad_to_length": 4}}
    )
    # de-dupe
    data.drop_duplicates(subset=['frec_code'], inplace=True)
    # create foreign key dicts
    cgac_dict = {str(cgac.cgac_code): cgac.cgac_id for
                 cgac in sess.query(CGAC).filter(CGAC.cgac_code.in_(data["cgac_code"])).all()}

    # insert to db
    delete_missing_frecs(models, data)
    update_frecs(models, data, cgac_dict)
    sess.add_all(models.values())
    sess.commit()

    logger.info('%s FREC records inserted', len(models))
def load_cgac(file_name):
    """ Load CGAC (high-level agency names) lookup table.

        Args:
            file_name: path/url to the file to be read
    """
    sess = GlobalDB.db().session
    models = {cgac.cgac_code: cgac for cgac in sess.query(CGAC)}

    # read CGAC values from csv
    data = pd.read_csv(file_name, dtype=str)
    # clean data
    data = clean_data(
        data,
        CGAC,
        {"cgac_agency_code": "cgac_code", "agency_name": "agency_name",
         "agency_abbreviation": "agency_abbreviation"},
        {"cgac_code": {"pad_to_length": 3}}
    )
    # de-dupe
    data.drop_duplicates(subset=['cgac_code'], inplace=True)

    delete_missing_cgacs(models, data)
    update_cgacs(models, data)
    sess.add_all(models.values())
    sess.commit()

    logger.info('%s CGAC records inserted', len(models))
def main():
    """ Loads DUNS from the DUNS export file (comprised of DUNS pre-2014) """
    parser = argparse.ArgumentParser(description='Adding historical DUNS to Broker.')
    parser.add_argument('-size', '--block_size', help='Number of rows to batch load', type=int,
                        default=10000)
    args = parser.parse_args()

    sess = GlobalDB.db().session
    client = sam_config_is_valid()

    logger.info('Retrieving historical DUNS file')
    start = datetime.now()
    if CONFIG_BROKER["use_aws"]:
        s3_client = boto3.client('s3', region_name=CONFIG_BROKER['aws_region'])
        duns_file = s3_client.generate_presigned_url('get_object', {'Bucket': CONFIG_BROKER['archive_bucket'],
                                                                    'Key': "DUNS_export_deduped.csv"}, ExpiresIn=10000)
    else:
        duns_file = os.path.join(CONFIG_BROKER["broker_files"], "DUNS_export_deduped.csv")

    if not duns_file:
        raise OSError("No DUNS_export_deduped.csv found.")

    logger.info("Retrieved historical DUNS file in {} s".format((datetime.now()-start).total_seconds()))

    try:
        run_duns_batches(duns_file, sess, client, args.block_size)
    except Exception as e:
        logger.exception(e)
        sess.rollback()

    logger.info("Updating historical DUNS complete")
    sess.close()
    def delete_submission(submission):
        """ Deletes all data associated with the specified submission
        NOTE: THERE IS NO WAY TO UNDO THIS """

        if submission.publish_status_id != PUBLISH_STATUS_DICT['unpublished']:
            return JsonResponse.error(ValueError("Submissions that have been certified cannot be deleted"),
                                      StatusCode.CLIENT_ERROR)

        sess = GlobalDB.db().session

        # Check if the submission has any jobs that are currently running, if so, do not allow deletion
        jobs = sess.query(Job).filter(Job.submission_id == submission.submission_id,
                                      Job.job_status_id == JOB_STATUS_DICT['running']).all()

        if jobs:
            return JsonResponse.error(ValueError("Submissions with running jobs cannot be deleted"),
                                      StatusCode.CLIENT_ERROR)

        sess.query(SubmissionSubTierAffiliation).filter(
            SubmissionSubTierAffiliation.submission_id == submission.submission_id).delete(
                synchronize_session=False)
        sess.query(Submission).filter(Submission.submission_id == submission.submission_id).delete(
            synchronize_session=False)
        sess.expire_all()

        return JsonResponse.create(StatusCode.OK, {"message": "Success"})
def main():
    parser = argparse.ArgumentParser(description='Update contract transaction and/or fabs rows based on updates to the '
                                                 'agency list')
    parser.add_argument('-a', '--missing_agency', help='Perform an update on 999 agency codes', action='store_true',
                        required=False, default=False)
    parser.add_argument('-s', '--subtier_code', help='Select specific subtier to update. Must be a 4-digit code',
                        type=str, required=False)
    parser.add_argument('-t', '--tables', help='Which tables (fabs, fpds, or both) to update. Defaults to both.',
                        required=False, default='both', choices=['fabs', 'fpds', 'both'])
    args = parser.parse_args()

    if not args.subtier_code and not args.missing_agency:
        logger.error('Missing either subtier_code or missing_agency argument')
    elif args.subtier_code and len(args.subtier_code) != 4:
        logger.error('Subtier not a correct format, must be 4 digits')
    else:
        sess = GlobalDB.db().session

        if args.tables in ('fpds', 'both'):
            update_table(sess, 'awarding', 'detached_award_procurement', args)
            update_table(sess, 'funding', 'detached_award_procurement', args)
            logger.info("Procurement Update Complete")

        if args.tables in ('fabs', 'both'):
            update_table(sess, 'awarding', 'published_award_financial_assistance', args)
            update_table(sess, 'funding', 'published_award_financial_assistance', args)
            logger.info("Award Financial Assistance Update Complete")
def main():
    sess = GlobalDB.db().session

    # delete any data in the PublishedAwardFinancialAssistance table
    logger.info('deleting PublishedAwardFinancialAssistance data')
    sess.query(PublishedAwardFinancialAssistance).delete(synchronize_session=False)
    sess.commit()

    if CONFIG_BROKER["use_aws"]:
        s3connection = boto.s3.connect_to_region(CONFIG_BROKER['aws_region'])
        s3bucket = s3connection.lookup(CONFIG_BROKER['archive_bucket'])
        for key in s3bucket.list():
            if re.match('^\d{4}_All_(DirectPayments|Grants|Insurance|Loans|Other)_Full_\d{8}.csv.zip', key.name):
                file_path = key.generate_url(expires_in=600)
                parse_fabs_file(urllib.request.urlopen(file_path), sess)
    else:
        base_path = os.path.join(CONFIG_BROKER["path"], "dataactvalidator", "config", "fabs")
        file_list = [f for f in os.listdir(base_path)]
        for file in file_list:
            if re.match('^\d{4}_All_(DirectPayments|Grants|Insurance|Loans|Other)_Full_\d{8}.csv.zip', file):
                parse_fabs_file(open(os.path.join(base_path, file)), sess)

    set_active_rows(sess)

    logger.info("Historical FABS script complete")
def update_offices(csv_path):
    """ Load office data from the provided CSV and replace/insert any office lookups.

        Args:
            csv_path: path/url to the file to read from
    """
    sess = GlobalDB.db().session

    data = clean_office(csv_path)
    add_existing_id(data)

    old_data = data[data['existing_id'].notnull()]
    del old_data['existing_id']

    new_data = data[data['existing_id'].isnull()]
    del new_data['existing_id']

    # instead of using the pandas to_sql dataframe method like some of the other domain load processes, iterate through
    # the dataframe rows so we can load using the orm model (note: toyed with the SQLAlchemy bulk load options but
    # ultimately decided not to go outside the unit of work for the sake of a performance gain)
    for _, row in old_data.iterrows():
        sess.query(FPDSContractingOffice).filter_by(contracting_office_code=row['contracting_office_code'])\
            .update(row, synchronize_session=False)

    for _, row in new_data.iterrows():
        sess.add(FPDSContractingOffice(**row))

    sess.commit()
    logger.info('%s records in CSV, %s existing', len(data.index), sum(data['existing_id'].notnull()))
示例#11
0
def submission_procurements(submission_id):
    """Fetch procurements and subcontracts"""
    sess = GlobalDB.db().session

    logger.debug('Starting submission procurements')

    award_proc_sub = sess.query(AwardProcurement.piid, AwardProcurement.parent_award_id,
                                AwardProcurement.naics_description, AwardProcurement.awarding_sub_tier_agency_c,
                                AwardProcurement.submission_id).\
        filter(AwardProcurement.submission_id == submission_id).distinct().cte("award_proc_sub")

    results = sess.query(award_proc_sub, FSRSProcurement, FSRSSubcontract).\
        filter(FSRSProcurement.contract_number == award_proc_sub.c.piid).\
        filter(FSRSProcurement.idv_reference_number.isnot_distinct_from(award_proc_sub.c.parent_award_id)).\
        filter(FSRSProcurement.contracting_office_aid == award_proc_sub.c.awarding_sub_tier_agency_c).\
        filter(FSRSSubcontract.parent_id == FSRSProcurement.id)

    # The cte returns a set of columns, not an AwardProcurement object, so we have to unpack each column
    for award_piid, award_parent_id, award_naics_desc, award_sub_tier, award_sub_id, proc, sub in results:
        # need to combine those columns again here so we can get a proper ModelRow
        award = AwardProcurement(piid=award_piid, parent_award_id=award_parent_id, naics_description=award_naics_desc,
                                 awarding_sub_tier_agency_c=award_sub_tier, submission_id=award_sub_id)
        yield ModelRow(award, proc, sub, naics_desc=award.naics_description)

    logger.debug('Finished submission procurements')
def main():
    sess = GlobalDB.db().session
    start = datetime.datetime.now()
    logger.info("FPDS IDV delete started")

    # get and read the file
    del_file = get_delete_file()
    data = pd.read_csv(del_file, dtype=str, encoding='utf_8_sig')

    # Clean up the data so it's usable
    data = clean_delete_data(data)

    # Gather list of records to delete
    gather_start = datetime.datetime.now()
    logger.info("Starting gathering of records to delete.")
    delete_list, delete_dict = get_deletes(sess, data)
    gather_end = datetime.datetime.now()
    logger.info("Finished gathering records in {} seconds. Total records to delete: {}".format(gather_end-gather_start,
                                                                                               len(delete_list)))
    # Delete records
    logger.info("Deleting records")
    delete_records(sess, delete_list, delete_dict)
    sess.commit()

    end = datetime.datetime.now()
    logger.info("FPDS IDV delete finished in %s seconds", end-start)
def insert_from_table(file_type_id, submission_id):
    """ Insert the data from the base staging table into the corresponding Certified table.

        Params:
            file_type_id:  Database file type ID for files A, B, or C
            submission_id: Database ID for the submission being loaded
    """
    sess = GlobalDB.db().session
    logger.info('Copying submission {} data from base table into {}'.format(submission_id,
                                                                            FTI_TABLENAME_DICT[file_type_id]))

    table_type = FTI_BASETABLE_DICT[file_type_id].__table__.name
    column_list = [col.key for col in FTI_BASETABLE_DICT[file_type_id].__table__.columns]
    column_list.remove('created_at')
    column_list.remove('updated_at')
    column_list.remove(table_type + '_id')
    col_string = ", ".join(column_list)

    executed = sess.execute(
        "INSERT INTO certified_{} (created_at, updated_at, {}) "
        "SELECT NOW() AS created_at, NOW() AS updated_at, {} "
        "FROM {} "
        "WHERE submission_id={}".format(table_type, col_string, col_string, table_type, submission_id))
    sess.commit()

    logger.info('Loaded {} records into the {} table'.format(executed.rowcount, FTI_TABLENAME_DICT[file_type_id]))
 def list_all_agencies():
     sess = GlobalDB.db().session
     cgacs = sess.query(CGAC).all()
     agency_list = [{'agency_name': cgac.agency_name, 'cgac_code': cgac.cgac_code} for cgac in cgacs]
     frecs = sess.query(FREC).all()
     shared_list = [{'agency_name': frec.agency_name, 'frec_code': frec.frec_code} for frec in frecs]
     return JsonResponse.create(StatusCode.OK, {'agency_list': agency_list, 'shared_agency_list': shared_list})
def create_generation_job(file_type_name, start_date, end_date):
    """ Add details to jobs for generating files

        Args:
            file_type_name: the name of the file type being generated
            job: the generation job, None if it is a detached generation
            start_date: The start date for the generation job, only used for detached files
            end_date: The end date for the generation job, only used for detached files

        Returns:
            the file generation job
    """
    sess = GlobalDB.db().session

    # Create a new job for a detached generation
    job = Job(job_type_id=lookups.JOB_TYPE_DICT['file_upload'], user_id=g.user.user_id,
              file_type_id=lookups.FILE_TYPE_DICT[file_type_name], start_date=start_date, end_date=end_date)
    sess.add(job)

    # Update the job details
    job.message = None
    job.job_status_id = lookups.JOB_STATUS_DICT["ready"]
    sess.commit()
    sess.refresh(job)

    return job
    def set_skip_guide(self):
        """ Set current user's skip guide parameter

            Returns:
                JsonResponse object containing results of setting the skip guide or details of the error that occurred.
                Possible errors include the request not containing a skip_guide parameter or it not being a boolean
                value
        """
        sess = GlobalDB.db().session
        request_dict = RequestDictionary.derive(self.request)
        try:
            if 'skip_guide' not in request_dict:
                raise ResponseException(
                    "Must include skip_guide parameter",
                    StatusCode.CLIENT_ERROR
                )
            skip_guide = str(request_dict['skip_guide']).lower()
            if skip_guide not in ("true", "false"):
                raise ResponseException(
                    "skip_guide must be true or false",
                    StatusCode.CLIENT_ERROR
                )
            g.user.skip_guide = skip_guide == "true"
        except ResponseException as exc:
            return JsonResponse.error(exc, exc.status)
        sess.commit()
        return JsonResponse.create(StatusCode.OK, {"message": "skip_guide set successfully", "skip_guide": skip_guide})
def check_generation_prereqs(submission_id, file_type):
    """ Make sure the prerequisite jobs for this file type are complete without errors.

        Args:
            submission_id: the submission id for which we're checking file generation prerequisites
            file_type: the type of file being generated

        Returns:
            A boolean indicating if the job has no incomplete prerequisites (True if the job is clear to start)
    """
    sess = GlobalDB.db().session
    prereq_query = sess.query(Job).filter(Job.submission_id == submission_id,
                                          or_(Job.job_status_id != lookups.JOB_STATUS_DICT['finished'],
                                              Job.number_of_errors > 0))

    # Check cross-file validation if generating E or F
    if file_type in ['E', 'F']:
        unfinished_prereqs = prereq_query.filter(Job.job_type_id == lookups.JOB_TYPE_DICT['validation']).count()
    # Check A, B, C files if generating a D file
    elif file_type in ['D1', 'D2']:
        unfinished_prereqs = prereq_query.filter(Job.file_type_id.in_(
            [lookups.FILE_TYPE_DICT['appropriations'], lookups.FILE_TYPE_DICT['program_activity'],
             lookups.FILE_TYPE_DICT['award_financial']])).count()
    else:
        raise ResponseException('Invalid type for file generation', StatusCode.CLIENT_ERROR)

    return unfinished_prereqs == 0
def main():
    sess = GlobalDB.db().session

    parser = argparse.ArgumentParser(description='Update business categories for existing transaction data')
    parser.add_argument('-fpds', help='Update only FPDS business categories', action='store_true')
    parser.add_argument('-fabs', help='Update only FABS business categories', action='store_true')
    args = parser.parse_args()

    overall_start = datetime.now()
    logger.info("Starting business categories updates")

    start = datetime.now()
    logger.info("Recreating business category SQL functions")
    sess.execute(BUSINESS_CATEGORY_FUNCTION_SQL)
    sess.commit()
    logger.info("Finished recreating business category SQL functions in %s seconds" % str(datetime.now() - start))

    if not (args.fpds and args.fabs):
        update_fpds_business_categories(sess)
        update_fabs_business_categories(sess)
    elif args.fpds:
        update_fpds_business_categories(sess)
    elif args.fabs:
        update_fabs_business_categories(sess)

    logger.info("Completed business categories updates in %s seconds" % str(datetime.now() - overall_start))
    def submit_files():
        file_manager = FileHandler(request, is_local=is_local, server_path=server_path)

        sess = GlobalDB.db().session

        start_date = request.json.get('reporting_period_start_date')
        end_date = request.json.get('reporting_period_end_date')
        is_quarter = request.json.get('is_quarter_format', False)

        if not (start_date is None or end_date is None):
            formatted_start_date, formatted_end_date = FileHandler.check_submission_dates(start_date,
                                                                                          end_date, is_quarter)

            submissions = sess.query(Submission).filter(
                Submission.cgac_code == request.json.get('cgac_code'),
                Submission.frec_code == request.json.get('frec_code'),
                Submission.reporting_start_date == formatted_start_date,
                Submission.reporting_end_date == formatted_end_date,
                Submission.is_quarter_format == request.json.get('is_quarter'),
                Submission.publish_status_id != PUBLISH_STATUS_DICT['unpublished'])

            if 'existing_submission_id' in request.json:
                submissions.filter(Submission.submission_id !=
                                   request.json['existing_submission_id'])

            submissions = submissions.order_by(desc(Submission.created_at))

            if submissions.count() > 0:
                data = {
                        "message": "A submission with the same period already exists.",
                        "submissionId": submissions[0].submission_id
                }
                return JsonResponse.create(StatusCode.CLIENT_ERROR, data)

        return file_manager.submit(create_credentials)
def read_zips():
    with create_app().app_context():
        sess = GlobalDB.db().session

        # delete old values in case something changed and one is now invalid
        sess.query(Zips).delete(synchronize_session=False)
        sess.commit()

        if CONFIG_BROKER["use_aws"]:
            s3connection = boto.s3.connect_to_region(CONFIG_BROKER['aws_region'])
            s3bucket = s3connection.lookup(CONFIG_BROKER['sf_133_bucket'])
            zip_folder = CONFIG_BROKER["zip_folder"] + "/"
            for key in s3bucket.list(prefix=zip_folder):
                if key.name != zip_folder:
                    zip_4_file_path = key.generate_url(expires_in=600)
                    parse_zip4_file(urllib.request.urlopen(zip_4_file_path), sess)

            # parse remaining 5 digit zips that weren't in the first file
            citystate_file = s3bucket.get_key("ctystate.txt").generate_url(expires_in=600)
            parse_citystate_file(urllib.request.urlopen(citystate_file), sess)
        else:
            base_path = os.path.join(CONFIG_BROKER["path"], "dataactvalidator", "config", CONFIG_BROKER["zip_folder"])
            # creating the list while ignoring hidden files on mac
            file_list = [f for f in os.listdir(base_path) if not re.match('^\.', f)]
            for file in file_list:
                parse_zip4_file(open(os.path.join(base_path, file)), sess)

            # parse remaining 5 digit zips that weren't in the first file
            citystate_file = os.path.join(CONFIG_BROKER["path"], "dataactvalidator", "config", "ctystate.txt")
            parse_citystate_file(open(citystate_file), sess)

        logger.info("Zipcode script complete")
def setup_emails():
    """Create email templates from model metadata."""
    with create_app().app_context():
        sess = GlobalDB.db().session

        # insert email template types
        type_list = [
            ('review_submission', '')
        ]
        for t in type_list:
            email_id = sess.query(
                EmailTemplateType.email_template_type_id).filter(
                EmailTemplateType.name == t[0]).one_or_none()
            if not email_id:
                email_type = EmailTemplateType(name=t[0], description=t[1])
                sess.add(email_type)

        sess.commit()

        # insert email templates

        # Submission Review
        template = ("[REV_USER_NAME] has shared a DATA Act broker submission with you from [REV_AGENCY]. Click "
                    "<a href='[REV_URL]'>here</a> to review their submission. For questions or comments, please visit "
                    "the Service Desk at https://servicedesk.usaspending.gov/ or e-mail [email protected].")
        load_email_template(sess, "DATA Act Broker - Submission Ready for Review", template, "review_submission")
def get_fabs_meta(submission_id):
    """Return the total rows, valid rows, publish date, and publish file for FABS submissions"""
    sess = GlobalDB.db().session

    # get row counts from the DetachedAwardFinancialAssistance table
    dafa = DetachedAwardFinancialAssistance
    total_rows = sess.query(dafa).filter(dafa.submission_id == submission_id)
    valid_rows = total_rows.filter(dafa.is_valid)

    # retrieve the published data and file
    submission = sess.query(Submission).filter(Submission.submission_id == submission_id).one()
    publish_date, published_file = None, None
    certify_data = get_lastest_certified_date(submission, is_fabs=True)

    try:
        iter(certify_data)
    except TypeError:
        publish_date = certify_data
    else:
        publish_date, file_path = certify_data
        if CONFIG_BROKER["use_aws"] and file_path:
            path, file_name = file_path.rsplit('/', 1)  # split by last instance of /
            published_file = S3Handler().get_signed_url(path=path, file_name=file_name,
                                                        bucket_route=CONFIG_BROKER['certified_bucket'],
                                                        url_mapping=CONFIG_BROKER["certified_bucket_mapping"],
                                                        method="get_object")
        elif file_path:
            published_file = file_path

    return {
        'valid_rows': valid_rows.count(),
        'total_rows': total_rows.count(),
        'publish_date': publish_date.strftime('%-I:%M%p %m/%d/%Y') if publish_date else None,
        'published_file': published_file
    }
    def setUpClass(cls):
        """Set up class-wide resources like submissions and jobs."""
        super(UserTests, cls).setUpClass()

        with create_app().app_context():
            sess = GlobalDB.db().session

            # Add submissions for agency user
            sess.query(Submission).filter(Submission.user_id == cls.agency_user_id).delete()
            sess.commit()
            for i in range(0, 6):
                sub = Submission(user_id=cls.agency_user_id)
                sub.reporting_start_date = datetime(2015, 10, 1)
                sub.reporting_end_date = datetime(2015, 12, 31)
                sub.cgac_code = cls.admin_cgac_code
                sess.add(sub)
                sess.commit()
                if i == 0:
                    cls.submission_id = sub.submission_id

            # Add job to first submission
            job = Job(
                submission_id=cls.submission_id,
                job_status_id=JOB_STATUS_DICT['running'],
                job_type_id=JOB_TYPE_DICT['file_upload'],
                file_type_id=FILE_TYPE_DICT['appropriations']
            )
            sess.add(job)
            sess.commit()
            cls.uploadId = job.job_id
def write_file_error(job_id, filename, error_type, extra_info=None):
    """ Write a file-level error to the file table

    Args:
        job_id: ID of job in job tracker
        filename: name of error report in S3
        error_type: type of error, value will be mapped to ValidationError class
        extra_info: list of extra information to be included in file
    """
    sess = GlobalDB.db().session
    try:
        int(job_id)
    except:
        raise ValueError("".join(["Bad jobId: ", str(job_id)]))

    # Get File object for this job ID or create it if it doesn't exist
    file_rec = create_file_if_needed(job_id, filename)

    # Mark error type and add header info if present
    file_rec.file_status_id = FILE_STATUS_DICT[ValidationError.get_error_type_string(error_type)]
    if extra_info is not None:
        if "missing_headers" in extra_info:
            file_rec.headers_missing = extra_info["missing_headers"]
        if "duplicated_headers" in extra_info:
            file_rec.headers_duplicated = extra_info["duplicated_headers"]

    sess.add(file_rec)
    sess.commit()
def get_window():
    sess = GlobalDB.db().session

    curr_date = datetime.now().date()

    return sess.query(SubmissionWindow).filter(
                                            SubmissionWindow.start_date <= curr_date,
                                            SubmissionWindow.end_date >= curr_date)
    def check_year_and_quarter(cgac_code, frec_code, reporting_fiscal_year, reporting_fiscal_period):
        """ Check if cgac (or frec) code, year, and quarter already has a published submission """
        if not cgac_code and not frec_code:
            return JsonResponse.error(ValueError("CGAC or FR Entity Code required"), StatusCode.CLIENT_ERROR)

        sess = GlobalDB.db().session
        return find_existing_submissions_in_period(sess, cgac_code, frec_code, reporting_fiscal_year,
                                                   reporting_fiscal_period)
示例#27
0
 def before_request():
     # Set global value for local
     g.is_local = local
     sess = GlobalDB.db().session
     # setup user
     g.user = None
     if session.get('name') is not None:
         g.user = sess.query(User).filter_by(user_id=session['name']).one_or_none()
def update_tas_ids(model_class, submission_id):
    sess = GlobalDB.db().session
    submission = sess.query(Submission).filter_by(submission_id=submission_id).one()

    subquery = matching_cars_subquery(sess, model_class, submission.reporting_start_date, submission.reporting_end_date)
    sess.query(model_class).filter_by(submission_id=submission_id).\
        update({getattr(model_class, 'tas_id'): subquery}, synchronize_session=False)
    sess.commit()
def test_w_perms_broker_user(create_session_mock, max_dict_mock, database, monkeypatch):
    ah = max_login_func(create_session_mock, max_dict_mock, monkeypatch, MAX_RESPONSE_W_PERMS)
    res = ah.max_login({})
    sess = GlobalDB.db().session
    # This is to prevent an integrity error with other tests that create users.
    sess.query(func.lower(User.email) == func.lower("*****@*****.**")).delete()
    sess.commit()
    assert res is True
def load_tas(backfill_historic=False):
    """ Load TAS file into broker database.

        Args:
            backfill_historic: if set to true, this will only update certain columns if budget_function_code is null
    """
    # read TAS file to dataframe, to make sure all is well with the file before firing up a db transaction
    tas_files = []

    if CONFIG_BROKER["use_aws"]:
        # Storing version dictionaries in the list to prevent getting all the links at once and possibly work with
        # expired AWS links
        s3connection = boto3.client('s3', region_name=CONFIG_BROKER['aws_region'])
        # list_object_versions returns the versions in reverse chronological order

        if not backfill_historic:
            # get the latest tas_file
            tas_files = [s3connection.generate_presigned_url(ClientMethod='get_object',
                                                             Params={'Bucket': CONFIG_BROKER['sf_133_bucket'],
                                                                     'Key': 'cars_tas.csv'}, ExpiresIn=600)]
        else:
            tas_files = s3connection.list_object_versions(Bucket=CONFIG_BROKER['sf_133_bucket'],
                                                          Prefix='cars_tas.csv')['Versions']
            # getting the latest file (see the reversed) from each day for performance and accuracy
            tas_files_grouped = {tas_file['LastModified'].date(): tas_file for tas_file in reversed(tas_files)}
            # sorting them back to chronological order
            tas_files = sorted([tas_file for date, tas_file in tas_files_grouped.items()],
                               key=lambda k: k['LastModified'])
    elif backfill_historic:
        logger.error('Unable to attain historical versions of cars_tas without aws access.')
        return
    else:
        tas_file = os.path.join(CONFIG_BROKER["path"], "dataactvalidator", "config", "cars_tas.csv")
        tas_files.append(tas_file)

    sess = GlobalDB.db().session
    for tas_file in reversed(tas_files):
        update_missing = missing_records(sess) if backfill_historic else []
        if backfill_historic and not update_missing:
            # no more missing, we're done here
            break
        if CONFIG_BROKER["use_aws"] and backfill_historic:
            # generate url from the version dict
            logger.info('Working with remote cars_tas.csv (from {})'.format(tas_file['LastModified']))
            tas_file = s3connection.generate_presigned_url(
                ClientMethod='get_object',
                Params={
                    'Bucket': CONFIG_BROKER['sf_133_bucket'],
                    'Key': 'cars_tas.csv',
                    'VersionId': tas_file['VersionId']
                },
                ExpiresIn=600
            )
        elif CONFIG_BROKER["use_aws"]:
            logger.info('Working with latest remote cars_tas.csv')
        else:
            logger.info('Working with local cars_tas.csv')
        update_tas_lookups(sess, tas_file, update_missing=update_missing)
    def setUpClass(cls):
        """Set up resources to be shared within a test class"""
        cls.session_id = ""

        with createValidatorApp().app_context():

            # update application's db config options so unittests
            # run against test databases
            suite = cls.__name__.lower()
            config = dataactcore.config.CONFIG_DB
            cls.num = randint(1, 9999)
            config['db_name'] = 'unittest{}_{}_data_broker'.format(
                cls.num, suite)
            dataactcore.config.CONFIG_DB = config
            createDatabase(CONFIG_DB['db_name'])
            runMigrations()

            # drop and re-create test user db/tables
            setupUserDB()
            # drop and re-create test job db/tables
            setupJobTrackerDB()
            # drop and re-create test error db/tables
            setupErrorDB()
            # drop and re-create test validation db/tables
            setupValidationDB()
            # load e-mail templates
            setupEmails()

            # set up default e-mails for tests
            test_users = {}
            test_users['admin_email'] = '*****@*****.**'
            test_users['change_user_email'] = '*****@*****.**'
            test_users['password_reset_email'] = '*****@*****.**'
            test_users['inactive_email'] = '*****@*****.**'
            test_users['password_lock_email'] = '*****@*****.**'
            test_users['expired_lock_email'] = '*****@*****.**'
            test_users['agency_admin_email'] = '*****@*****.**'

            # this email is for a regular agency_user email that is to be used for
            # testing functionality expected by a normal, base user
            test_users['agency_user'] = '******'
            test_users['approved_email'] = '*****@*****.**'
            test_users['submission_email'] = '*****@*****.**'
            user_password = '******'
            admin_password = '******'

            # set up users for status tests
            StatusTestUser = namedtuple(
                'StatusTestUser',
                ['email', 'user_status', 'permissions', 'user_type'])
            StatusTestUser.__new__.__defaults__ = (None, None,
                                                   AccountType.AGENCY_USER,
                                                   None)
            status_test_users = []
            status_test_users.append(
                StatusTestUser('*****@*****.**', 'awaiting_confirmation', 0))
            status_test_users.append(
                StatusTestUser('*****@*****.**', 'email_confirmed'))
            status_test_users.append(
                StatusTestUser('*****@*****.**', 'awaiting_approval'))
            status_test_users.append(
                StatusTestUser('*****@*****.**', 'awaiting_approval'))
            status_test_users.append(
                StatusTestUser('*****@*****.**',
                               'awaiting_approval'))
            status_test_users.append(
                StatusTestUser(
                    test_users['admin_email'], 'approved',
                    AccountType.WEBSITE_ADMIN + AccountType.AGENCY_USER))
            status_test_users.append(
                StatusTestUser(test_users['approved_email'], 'approved'))
            status_test_users.append(
                StatusTestUser('*****@*****.**', 'denied'))

            # add new users
            createUserWithPassword(test_users["submission_email"],
                                   user_password, Bcrypt())
            createUserWithPassword(test_users["change_user_email"],
                                   user_password, Bcrypt())
            createUserWithPassword(test_users["password_reset_email"],
                                   user_password, Bcrypt())
            createUserWithPassword(test_users["inactive_email"], user_password,
                                   Bcrypt())
            createUserWithPassword(test_users["password_lock_email"],
                                   user_password, Bcrypt())
            createUserWithPassword(test_users['expired_lock_email'],
                                   user_password, Bcrypt())
            createUserWithPassword(test_users['agency_admin_email'],
                                   admin_password,
                                   Bcrypt(),
                                   permission=4)
            createUserWithPassword(test_users['agency_user'], user_password,
                                   Bcrypt())

            # get user info and save as class variables for use by tests

            sess = GlobalDB.db().session

            agencyUser = sess.query(User).filter(
                User.email == test_users['agency_user']).one()
            cls.agency_user_id = agencyUser.user_id

            # set the specified account to be expired
            expiredUser = sess.query(User).filter(
                User.email == test_users['expired_lock_email']).one()
            today = parse(time.strftime("%c"))
            expiredUser.last_login_date = (today -
                                           timedelta(days=120)).strftime("%c")
            sess.add(expiredUser)

            # create users for status testing
            for u in status_test_users:
                user = User(email=u.email,
                            permissions=u.permissions,
                            user_status=sess.query(UserStatus).filter(
                                UserStatus.name == u.user_status).one())
                sess.add(user)

            # set up approved user
            user = sess.query(User).filter(
                User.email == test_users['approved_email']).one()
            user.username = "******"
            user.cgac_code = "000"
            user.salt, user.password_hash = getPasswordHash(
                user_password, Bcrypt())
            sess.add(user)
            cls.approved_user_id = user.user_id

            # set up admin user
            admin = sess.query(User).filter(
                User.email == test_users['admin_email']).one()
            admin.salt, admin.password_hash = getPasswordHash(
                admin_password, Bcrypt())
            admin.name = "Mr. Manager"
            admin.cgac_code = "SYS"
            sess.add(admin)

            # set up status changed user
            statusChangedUser = sess.query(User).filter(
                User.email == test_users['change_user_email']).one()
            statusChangedUser.name = "Test User"
            statusChangedUser.user_status = sess.query(UserStatus).filter(
                UserStatus.name == 'email_confirmed').one()
            sess.add(statusChangedUser)
            cls.status_change_user_id = statusChangedUser.user_id

            # set up deactivated user
            deactivated_user = sess.query(User).filter(
                User.email == test_users['inactive_email']).one()
            deactivated_user.last_login_date = time.strftime("%c")
            deactivated_user.is_active = False
            sess.add(deactivated_user)

            sess.commit()

        # get lookup dictionaries
        cls.jobStatusDict = lookups.JOB_STATUS_DICT
        cls.jobTypeDict = lookups.JOB_TYPE_DICT
        cls.fileTypeDict = lookups.FILE_TYPE_DICT
        cls.fileStatusDict = lookups.FILE_STATUS_DICT
        cls.ruleSeverityDict = lookups.RULE_SEVERITY_DICT
        cls.errorTypeDict = lookups.ERROR_TYPE_DICT
        cls.publishStatusDict = lookups.PUBLISH_STATUS_DICT
        cls.userStatusDict = lookups.USER_STATUS_DICT

        # set up info needed by the individual test classes
        cls.test_users = test_users
        cls.user_password = user_password
        cls.admin_password = admin_password
        cls.local = CONFIG_BROKER['local']
示例#32
0
 def receive_messages(WaitTimeSeconds, MessageAttributeNames=None):  # noqa
     sess = GlobalDB.db().session
     messages = []
     for sqs in sess.query(SQS):
         messages.append(SQSMockMessage(sqs))
     return messages
def add_jobs_for_uploaded_file(upload_file, submission_id, existing_submission):
    """ Add upload and validation jobs for a single filetype

    Arguments:
        upload_file: UploadFile named tuple
        submission_id: submission ID to attach to jobs
        existing_submission: true if we should update existing jobs rather than creating new ones

    Returns:
        the validation job id for this file type (if any)
        the upload job id for this file type
    """
    sess = GlobalDB.db().session

    file_type_id = FILE_TYPE_DICT[upload_file.file_type]
    validation_job_id = None

    # Create a file upload job or, for an existing submission, modify the
    # existing upload job.

    if existing_submission:
        # mark existing upload job as running
        upload_job = sess.query(Job).filter_by(
            submission_id=submission_id,
            file_type_id=file_type_id,
            job_type_id=JOB_TYPE_DICT['file_upload']
        ).one()
        # mark as running and set new file name and path
        upload_job.job_status_id = JOB_STATUS_DICT['running']
        upload_job.original_filename = upload_file.file_name
        upload_job.filename = upload_file.upload_name

    else:
        if upload_file.file_type in ["award", "award_procurement"]:
            # file generation handled on backend, mark as ready
            upload_status = JOB_STATUS_DICT['ready']
        elif upload_file.file_type in ["awardee_attributes", "sub_award"]:
            # these are dependent on file D2 validation
            upload_status = JOB_STATUS_DICT['waiting']
        else:
            # mark as running since frontend should be doing this upload
            upload_status = JOB_STATUS_DICT['running']

        upload_job = Job(
            original_filename=upload_file.file_name,
            filename=upload_file.upload_name,
            file_type_id=file_type_id,
            job_status_id=upload_status,
            job_type_id=JOB_TYPE_DICT['file_upload'],
            submission_id=submission_id)
        sess.add(upload_job)
        sess.flush()

    if existing_submission:
        # if the file's validation job is attached to an existing submission,
        # reset its status and delete any validation artifacts (e.g., error metadata) that
        # might exist from a previous run.
        val_job = sess.query(Job).filter_by(
            submission_id=submission_id,
            file_type_id=file_type_id,
            job_type_id=JOB_TYPE_DICT['csv_record_validation']
        ).one()
        val_job.job_status_id = JOB_STATUS_DICT['waiting']
        val_job.original_filename = upload_file.file_name
        val_job.filename = upload_file.upload_name
        # reset file size and number of rows to be set during validation of new file
        val_job.file_size = None
        val_job.number_of_rows = None
        # delete error metadata this might exist from a previous run of this validation job
        sess.query(ErrorMetadata).\
            filter(ErrorMetadata.job_id == val_job.job_id).\
            delete(synchronize_session='fetch')
        # delete file error information that might exist from a previous run of this validation job
        sess.query(File).filter(File.job_id == val_job.job_id).delete(synchronize_session='fetch')

    else:
        # create a new record validation job and add dependencies if necessary
        if upload_file.file_type == "awardee_attributes":
            d1_val_job = sess.query(Job).\
                filter(Job.submission_id == submission_id,
                       Job.file_type_id == FILE_TYPE_DICT['award_procurement'],
                       Job.job_type_id == JOB_TYPE_DICT['csv_record_validation']).\
                one_or_none()
            if d1_val_job is None:
                raise Exception("Cannot create E job without a D1 job")
            # Add dependency on D1 validation job
            d1_dependency = JobDependency(job_id=upload_job.job_id, prerequisite_id=d1_val_job.job_id)
            sess.add(d1_dependency)

        elif upload_file.file_type == "sub_award":
            # todo: check for C validation job
            c_val_job = sess.query(Job).\
                filter(Job.submission_id == submission_id,
                       Job.file_type_id == FILE_TYPE_DICT['award_financial'],
                       Job.job_type_id == JOB_TYPE_DICT['csv_record_validation']).\
                one_or_none()
            if c_val_job is None:
                raise Exception("Cannot create F job without a C job")
            # add dependency on C validation job
            c_dependency = JobDependency(job_id=upload_job.job_id, prerequisite_id=c_val_job.job_id)
            sess.add(c_dependency)

        else:
            # E and F don't get validation jobs
            val_job = Job(
                original_filename=upload_file.file_name,
                filename=upload_file.upload_name,
                file_type_id=file_type_id,
                job_status_id=JOB_STATUS_DICT['waiting'],
                job_type_id=JOB_TYPE_DICT['csv_record_validation'],
                submission_id=submission_id)
            sess.add(val_job)
            sess.flush()
            # add dependency between file upload job and file validation job
            upload_dependency = JobDependency(job_id=val_job.job_id, prerequisite_id=upload_job.job_id)
            sess.add(upload_dependency)
            validation_job_id = val_job.job_id

    sess.commit()

    return validation_job_id, upload_job.job_id
def setUserActive(user, is_active):
    """ Sets the is_active field for the specified user """
    sess = GlobalDB.db().session
    user.is_active = is_active
    sess.commit()
def main():
    parser = argparse.ArgumentParser(
        description=
        'Update county information for historical FABS and FPDS data')
    parser.add_argument('-t',
                        '--type',
                        help='Which data type, argument must be fpds or fabs',
                        nargs=1,
                        type=str,
                        required=True)
    parser.add_argument('-s',
                        '--start',
                        help='Start id, must be number',
                        nargs=1,
                        type=int,
                        required=True)
    parser.add_argument('-e',
                        '--end',
                        help='End id, must be number',
                        nargs=1,
                        type=int,
                        required=True)
    args = parser.parse_args()

    sess = GlobalDB.db().session

    data_type = args.type[0]

    global g_country_list
    global g_state_by_code
    global g_state_code_by_fips
    global g_state_by_name
    global g_zip_list
    global g_county_by_city
    global g_county_by_code
    global g_county_by_name

    logger.info("Starting location dictionary compilation")

    # get and create list of country code -> name mappings
    countries = sess.query(CountryCode).all()

    for country in countries:
        g_country_list[country.country_code] = country.country_name
    del countries

    # get and create list of state code -> state name mappings. Prime the county lists with state codes
    states = sess.query(States).all()

    for state in states:
        g_county_by_name[state.state_code] = {}
        g_county_by_code[state.state_code] = {}

        # we want to capitalize it if it's FPDS because that's how we store it
        state_name = state.state_name
        state_code = state.state_code
        if data_type == 'fpds':
            state_name = state_name.upper()
        g_state_by_code[state_code] = state_name
        g_state_code_by_fips[state.fips_code] = state_code
        g_state_by_name[state_name.upper()] = state_code
    del states

    # Fill the county lists with data (code -> name mappings and name -> code mappings)
    county_codes = sess.query(CountyCode.county_number, CountyCode.state_code,
                              CountyCode.county_name).all()

    for county_code in county_codes:
        state_code = county_code.state_code
        county_num = county_code.county_number
        county_name = county_code.county_name.strip()

        if data_type == 'fpds':
            # we don't want any "(CA)" endings for FPDS, so strip those (also strip all extra whitespace)
            county_name = county_name.replace(' (CA)', '').strip().upper()

        # we want all the counties in our by-code lookup because we'd be using this table anyway for derivations
        g_county_by_code[state_code][county_num] = county_name

        # if the county name has only letters/spaces then we want it in our by-name lookup, the rest have the
        # potential to be different from the FPDS feed (and won't be used in FABS)
        if re.match('^[A-Z\s]+$', county_name):
            g_county_by_name[state_code][county_name] = county_num
    del county_codes

    # pull in all city codes
    city_codes = sess.query(CityCode.city_code, CityCode.state_code,
                            CityCode.county_number).all()

    for city_code in city_codes:
        g_county_by_city[city_code.state_code +
                         city_code.city_code] = city_code.county_number
    del city_codes

    # pull in all the zip codes
    start_slice = 0
    while True:
        end_slice = start_slice + ZIP_SLICE
        zip_codes = sess.query(Zips.zip5, Zips.zip_last4, Zips.state_abbreviation, Zips.county_number).\
            slice(start_slice, end_slice).all()

        for zip_data in zip_codes:
            if zip_data.zip5 not in g_zip_list:
                g_zip_list[zip_data.zip5] = {}
                g_zip_list[zip_data.zip5]['default'] = {
                    "state_abbreviation": zip_data.state_abbreviation,
                    "county_number": zip_data.county_number
                }
            g_zip_list[zip_data.zip5][zip_data.zip_last4] = {
                "state_abbreviation": zip_data.state_abbreviation,
                "county_number": zip_data.county_number
            }

        logger.info("Added %s rows to zip dict", str(end_slice))

        start_slice = end_slice

        # break the loop if we've hit the last records
        if len(zip_codes) < ZIP_SLICE:
            break
    del zip_codes

    if data_type == 'fpds':
        update_historical_fpds(sess, args.start[0], args.end[0])
    elif data_type == 'fabs':
        update_historical_fabs(sess, args.start[0], args.end[0])
    else:
        logger.error("Type must be fpds or fabs.")

    # delete all global variables just in case
    del g_country_list
    del g_state_by_code
    del g_state_code_by_fips
    del g_state_by_name
    del g_zip_list
    del g_county_by_city
    del g_county_by_code
    del g_county_by_name

    logger.info("Completed location derivations")
示例#36
0
def validator_process_file_generation(file_gen_id, is_retry=False):
    """ Retrieves a FileGeneration object based on its ID, and kicks off a file generation. Handles errors by ensuring
        the FileGeneration (if exists) is no longer cached.

        Args:
            file_gen_id: ID of a FileGeneration object
            is_retry: If this is not the very first time handling execution of this job. If True, cleanup is
                      performed before proceeding to retry the job

        Raises:
            Any Exceptions raised by the FileGenerationManager
    """
    if is_retry:
        if cleanup_generation(file_gen_id):
            log_job_message(
                logger=logger,
                message=
                "Attempting a retry of {} after successful retry-cleanup.".
                format(inspect.stack()[0][3]),
                job_id=file_gen_id,
                is_debug=True)
        else:
            log_job_message(
                logger=logger,
                message="Retry of {} found to be not necessary after cleanup. "
                "Returning from job with success.".format(
                    inspect.stack()[0][3]),
                job_id=file_gen_id,
                is_debug=True)
            return

    sess = GlobalDB.db().session
    file_generation = None

    try:
        file_generation = sess.query(FileGeneration).filter_by(
            file_generation_id=file_gen_id).one_or_none()
        if file_generation is None:
            raise ResponseException(
                'FileGeneration ID {} not found in database'.format(
                    file_gen_id), StatusCode.CLIENT_ERROR, None)

        file_generation_manager = FileGenerationManager(
            sess, g.is_local, file_generation=file_generation)
        file_generation_manager.generate_file()

    except Exception as e:
        # Log uncaught exceptions and fail all Jobs referencing this FileGeneration
        error_data = {
            'message':
            'An unhandled exception occurred in the Validator during file generation',
            'message_type': 'ValidatorInfo',
            'file_generation_id': file_gen_id,
            'traceback': traceback.format_exc()
        }
        if file_generation:
            error_data.update({
                'agency_code': file_generation.agency_code,
                'agency_type': file_generation.agency_type,
                'start_date': file_generation.start_date,
                'end_date': file_generation.end_date,
                'file_type': file_generation.file_type,
                'file_path': file_generation.file_path,
            })
        logger.error(error_data)

        # Try to mark the Jobs as failed, but continue raising the original Exception if not possible
        try:
            if file_generation:
                # Uncache the FileGeneration
                sess.refresh(file_generation)
                file_generation.is_cached_file = False

                # Mark all Jobs waiting on this FileGeneration as failed
                generation_jobs = sess.query(Job).filter_by(
                    file_generation_id=file_gen_id).all()
                for job in generation_jobs:
                    if job.job_status in [
                            JOB_STATUS_DICT['waiting'],
                            JOB_STATUS_DICT['ready'],
                            JOB_STATUS_DICT['running']
                    ]:
                        mark_job_status(job.job_id, 'failed')
                        sess.refresh(job)
                        job.file_generation_id = None
                        job.error_message = str(e)
                sess.commit()
        except:
            pass

        # ResponseExceptions only occur at very specific times, and should not affect the Validator's future attempts
        # at handling messages from SQS
        if not isinstance(e, ResponseException):
            raise e
示例#37
0
def start_d_generation(job,
                       start_date,
                       end_date,
                       agency_type,
                       agency_code=None,
                       file_format='csv'):
    """ Validates the start and end dates of the generation, updates the submission's publish status and progress (if
        its not detached generation), and sends the job information to SQS.

        Args:
            job: File generation job to start
            start_date: String to parse as the start date of the generation
            end_date: String to parse as the end date of the generation
            agency_type: Type of Agency to generate files by: "awarding" or "funding"
            agency_code: Agency code for detached D file generations
            file_format: determines if the file generated is a txt or a csv
    """
    if not (StringCleaner.is_date(start_date)
            and StringCleaner.is_date(end_date)):
        raise ResponseException(
            "Start or end date cannot be parsed into a date of format MM/DD/YYYY",
            StatusCode.CLIENT_ERROR)

    # Update the Job's start and end dates
    sess = GlobalDB.db().session
    job.start_date = start_date
    job.end_date = end_date
    sess.commit()

    # Update submission
    if job.submission_id:
        agency_code = update_generation_submission(sess, job)

    mark_job_status(job.job_id, 'waiting')

    file_generation = retrieve_cached_file_generation(job, agency_type,
                                                      agency_code, file_format)
    if file_generation:
        try:
            copy_file_generation_to_job(job, file_generation, g.is_local)
        except Exception as e:
            logger.error(traceback.format_exc())

            mark_job_status(job.job_id, 'failed')
            job.error_message = str(e)
            sess.commit()
    else:
        # Create new FileGeneration and reset Jobs
        file_generation = FileGeneration(request_date=datetime.now().date(),
                                         start_date=job.start_date,
                                         end_date=job.end_date,
                                         file_type=job.file_type.letter_name,
                                         agency_code=agency_code,
                                         agency_type=agency_type,
                                         file_format=file_format,
                                         is_cached_file=True)
        sess.add(file_generation)
        sess.commit()

        try:
            job.file_generation_id = file_generation.file_generation_id
            sess.commit()
            reset_generation_jobs(sess, job)
            logger.info({
                'message':
                'Sending new FileGeneration {} to SQS'.format(
                    file_generation.file_generation_id),
                'message_type':
                'BrokerInfo',
                'file_type':
                job.file_type.letter_name,
                'job_id':
                job.job_id,
                'submission_id':
                job.submission_id,
                'file_generation_id':
                file_generation.file_generation_id
            })

            # Add file_generation_id to the SQS job queue
            queue = sqs_queue()
            message_attr = {
                "validation_type": {
                    "DataType": "String",
                    "StringValue": "generation"
                }
            }
            queue.send_message(MessageBody=str(
                file_generation.file_generation_id),
                               MessageAttributes=message_attr)
        except Exception as e:
            logger.error(traceback.format_exc())

            mark_job_status(job.job_id, 'failed')
            job.error_message = str(e)
            file_generation.is_cached_file = False
            sess.commit()
示例#38
0
    def max_login(self, session):
        """ Logs a user in if their password matches using MAX

            Args:
                session: Session object from flask

            Returns:
                A JsonResponse containing the user information or details on which error occurred, such as whether a
                type was wrong, something wasn't implemented, invalid keys were provided, login was denied, or a
                different, unexpected error occurred.
        """
        try:
            safe_dictionary = RequestDictionary(self.request)

            ticket = safe_dictionary.get_value("ticket")
            service = safe_dictionary.get_value('service')

            # Call MAX's serviceValidate endpoint and retrieve the response
            max_dict = get_max_dict(ticket, service)

            if 'cas:authenticationSuccess' not in max_dict[
                    'cas:serviceResponse']:
                raise ValueError(
                    "The Max CAS endpoint was unable to locate your session "
                    "using the ticket/service combination you provided.")
            cas_attrs = max_dict['cas:serviceResponse'][
                'cas:authenticationSuccess']['cas:attributes']

            # Grab MAX ID to see if a service account is being logged in
            max_id_components = cas_attrs['maxAttribute:MAX-ID'].split('_')
            service_account_flag = (len(max_id_components) > 1
                                    and max_id_components[0].lower() == 's')

            # Grab the email and list of groups from MAX's response
            email = cas_attrs['maxAttribute:Email-Address']

            try:
                sess = GlobalDB.db().session
                user = sess.query(User).filter(
                    func.lower(User.email) == func.lower(email)).one_or_none()

                # If the user does not exist, create them since they are allowed to access the site because they got
                # past the above group membership checks
                if user is None:
                    user = User()
                    user.email = email

                set_user_name(user, cas_attrs)

                set_max_perms(user, cas_attrs['maxAttribute:GroupList'],
                              service_account_flag)

                sess.add(user)
                sess.commit()

            except MultipleResultsFound:
                raise ValueError("An error occurred during login.")

            return self.create_session_and_response(session, user)

        # Catch any specifically raised errors or any other errors that may have happened and return them cleanly.
        # We add the error parameter here because this endpoint needs to provide better feedback, and to avoid changing
        # the default behavior of the JsonResponse class globally.
        except (TypeError, KeyError, NotImplementedError) as e:
            # Return a 400 with appropriate message
            return JsonResponse.error(e, StatusCode.CLIENT_ERROR, error=str(e))
        except ValueError as e:
            # Return a 401 for login denied
            return JsonResponse.error(e,
                                      StatusCode.LOGIN_REQUIRED,
                                      error=str(e))
        except Exception as e:
            # Return 500
            return JsonResponse.error(e,
                                      StatusCode.INTERNAL_ERROR,
                                      error=str(e))
示例#39
0
def perms_to_affiliations(perms, user_id, service_account_flag=False):
    """ Convert a list of perms from MAX to a list of UserAffiliations. Filter out and log any malformed perms

        Args:
            perms: list of permissions (as strings) for the user
            user_id: the ID of the user
            service_account_flag: flag to indicate a service account
        Yields:
            UserAffiliations based on the permissions provided
    """
    available_cgacs = {
        cgac.cgac_code: cgac
        for cgac in GlobalDB.db().session.query(CGAC)
    }
    available_frecs = {
        frec.frec_code: frec
        for frec in GlobalDB.db().session.query(FREC)
    }
    log_data = {'message_type': 'BrokerWarning', 'user_id': user_id}
    for perm in perms:
        log_data[
            'message'] = 'User with ID {} has malformed permission: {}'.format(
                user_id, perm)
        components = perm.split('-PERM_')
        if len(components) != 2:
            logger.warning(log_data)
            continue

        codes, perm_level = components
        split_codes = codes.split('-FREC_')
        frec_code, cgac_code = None, None
        if len(split_codes) == 2:
            # permissions for FR entity code and readonly CGAC
            frec_code, cgac_code = split_codes[1], split_codes[0]
            if frec_code not in available_frecs or cgac_code not in available_cgacs:
                logger.warning(log_data)
                continue
        else:
            # permissions for CGAC
            cgac_code = codes
            if cgac_code not in available_cgacs:
                logger.warning(log_data)
                continue

        perm_level = perm_level.lower()

        if service_account_flag:
            # Replace MAX Service Account permissions with Broker "write" and "editfabs" permissions
            perm_level = 'we'
        elif perm_level not in 'rwsef':
            logger.warning(log_data)
            continue

        for permission in perm_level:
            if frec_code:
                yield UserAffiliation(
                    cgac=available_cgacs[cgac_code],
                    frec=None,
                    permission_type_id=PERMISSION_SHORT_DICT['r'])
                yield UserAffiliation(
                    cgac=None,
                    frec=available_frecs[frec_code],
                    permission_type_id=PERMISSION_SHORT_DICT[permission])
            else:
                yield UserAffiliation(
                    cgac=available_cgacs[cgac_code] if cgac_code else None,
                    frec=None,
                    permission_type_id=PERMISSION_SHORT_DICT[permission])
示例#40
0
def list_submission_users(is_fabs):
    """ List user IDs and names that have submissions that the requesting user can see.

        Arguments:
            is_fabs: boolean indicating whether it is a DABS or FABS submission (True if FABS)

        Returns:
            A JsonResponse containing a list of users that have submissions that the requesting user can see
    """

    sess = GlobalDB.db().session
    # subquery to create the EXISTS portion of the query
    exists_query = sess.query(Submission).filter(
        Submission.user_id == User.user_id, Submission.is_fabs.is_(is_fabs))

    # if user is not an admin, we have to adjust the exists query to limit submissions
    if not g.user.website_admin:
        # split affiliations into frec and cgac
        cgac_affiliations = [aff for aff in g.user.affiliations if aff.cgac]
        frec_affiliations = [aff for aff in g.user.affiliations if aff.frec]

        # Don't list FABS permissions users if the user only has DABS permissions
        if not is_fabs:
            cgac_affiliations = [
                aff for aff in cgac_affiliations
                if aff.permission_type_id in DABS_PERMISSION_ID_LIST
            ]
            frec_affiliations = [
                aff for aff in frec_affiliations
                if aff.permission_type_id in DABS_PERMISSION_ID_LIST
            ]

        # Make a list of cgac and frec codes
        cgac_list = [aff.cgac.cgac_code for aff in cgac_affiliations]
        frec_list = [aff.frec.frec_code for aff in frec_affiliations]

        # Add filters where applicable
        affiliation_filters = [Submission.user_id == g.user.user_id]
        if cgac_list:
            affiliation_filters.append(Submission.cgac_code.in_(cgac_list))
        if frec_list:
            affiliation_filters.append(Submission.frec_code.in_(frec_list))

        exists_query = exists_query.filter(or_(*affiliation_filters))

    # Add an exists onto the query, couldn't do this earlier because then the filters couldn't get added in the if
    exists_query = exists_query.exists()

    # Get all the relevant users
    user_results = sess.query(User.user_id, User.name,
                              User.email).filter(exists_query).order_by(
                                  User.name).all()

    # Create an array containing relevant users in a readable format
    user_list = []
    for user in user_results:
        user_list.append({
            'user_id': user[0],
            'name': user[1],
            'email': user[2]
        })

    return JsonResponse.create(StatusCode.OK, {"users": user_list})
示例#41
0
 def delete(self):
     sess = GlobalDB.db().session
     sess.delete(self.sqs)
     sess.commit()
示例#42
0
def copy_file_generation_to_job(job, file_generation, is_local):
    """ Copy cached FileGeneration data to a Job requesting a file.

        Args:
            job: Job object to copy the data to
            file_generation: Cached FileGeneration object to copy the data from
            is_local: A boolean flag indicating whether the application is being run locally or not
    """
    sess = GlobalDB.db().session
    log_data = {
        'message':
        'Copying FileGeneration {} data to Job {}'.format(
            file_generation.file_generation_id, job.job_id),
        'message_type':
        'BrokerInfo',
        'job_id':
        job.job_id,
        'file_type':
        job.file_type.name,
        'file_generation_id':
        file_generation.file_generation_id
    }
    logger.info(log_data)

    # Do not edit submissions that have already successfully completed
    sess.refresh(job)
    if job.job_status_id == lookups.JOB_STATUS_DICT['finished']:
        return

    job.file_generation_id = file_generation.file_generation_id

    # File is still being generated, just mark the FileGeneration ID in the Job and wait
    # FileGeneration will update all child Jobs when it finishes
    if not file_generation.file_path:
        sess.commit()
        return

    # Generate file path for child Job's filename
    filepath = CONFIG_BROKER['broker_files'] if g.is_local else "{}/".format(
        str(job.submission_id))
    original_filename = file_generation.file_path.split('/')[-1]
    filename = '{}{}'.format(filepath, original_filename)

    # Copy parent job's data
    job.filename = filename
    job.original_filename = original_filename
    job.number_of_errors = 0
    job.number_of_warnings = 0

    # Change the validation job's file data when within a submission
    if job.submission_id is not None:
        val_job = sess.query(Job).filter(
            Job.submission_id == job.submission_id,
            Job.file_type_id == job.file_type_id, Job.job_type_id ==
            lookups.JOB_TYPE_DICT['csv_record_validation']).one()
        val_job.filename = filename
        val_job.original_filename = original_filename

        # Copy the data to the Submission's bucket
        if not g.is_local and file_generation.file_path != job.filename:
            # Check to see if the same file exists in the child bucket
            s3 = boto3.client('s3', region_name=CONFIG_BROKER["aws_region"])
            bucket = CONFIG_BROKER['aws_bucket']
            response = s3.list_objects_v2(Bucket=bucket, Prefix=job.filename)
            for obj in response.get('Contents', []):
                if obj['Key'] == job.filename:
                    # The file already exists in this location
                    log_data[
                        'message'] = '{} file already exists in this location: {}; not overwriting.'.format(
                            job.file_type.name, job.filename)
                    logger.info(log_data)
                    mark_job_status(job.job_id, 'finished')
                    return

            S3Handler.copy_file(bucket, bucket, file_generation.file_path,
                                job.filename)
    sess.commit()

    # Mark Job status last so the validation job doesn't start until everything is done
    mark_job_status(job.job_id, 'finished')
示例#43
0
def main():
    now = datetime.now()
    parser = argparse.ArgumentParser(
        description='Pull data from the Federal Hierarchy API.')
    parser.add_argument('-a',
                        '--all',
                        help='Clear out the database and get historical data',
                        action='store_true')
    parser.add_argument('-f',
                        '--filename',
                        help='Generate a local CSV file from the data.',
                        nargs=1,
                        type=str)
    parser.add_argument('-o',
                        '--export_office',
                        help='Export the current office table. '
                        'Please provide the file name/path.',
                        nargs=1,
                        type=str)
    parser.add_argument('-d',
                        '--pull_date',
                        help='Date from which to start the pull',
                        nargs=1,
                        type=str)
    parser.add_argument('-i',
                        '--ignore_db',
                        help='Do not update the DB tables',
                        action='store_true')
    args = parser.parse_args()

    if args.all and args.pull_date:
        logger.error('The -a and -d flags conflict, cannot use both at once.')
        sys.exit(1)

    metrics_json = {
        'script_name': 'load_federal_hierarchy.py',
        'start_time': str(now),
        'level_1_records': 0,
        'level_2_records': 0,
        'level_3_records': 0,
        'level_4_records': 0,
        'level_5_records': 0,
        'level_6_records': 0,
        'level_7_records': 0,
        'missing_cgacs': [],
        'missing_subtier_codes': []
    }

    # Handle the pull_date parameter
    updated_date_from = None
    if args.pull_date:
        try:
            updated_date_from = args.pull_date[0]
            datetime.strptime(updated_date_from, '%Y-%m-%d')
        except ValueError:
            logger.error('The date given to the -d flag was not parseable.')
            sys.exit(1)

    # Get or create the start date
    sess = GlobalDB.db().session
    if not args.all and not updated_date_from:
        last_pull_date = sess.query(ExternalDataLoadDate.last_load_date_start).\
            filter_by(external_data_type_id=EXTERNAL_DATA_TYPE_DICT['office']).one_or_none()
        if not last_pull_date:
            logger.error(
                'The -a or -d flag must be set when there is no latest run in the database.'
            )
            sys.exit(1)
        # We want to make the date one day earlier to account for any timing weirdness between the two systems
        updated_date_from = last_pull_date[0].date() - timedelta(days=1)

    # Handle the filename parameter
    filename = args.filename[0] if args.filename else None

    # Handle the export office parameter
    export_office = args.export_office[0] if args.export_office else None

    # Handle a complete data reload
    if args.all and not args.ignore_db:
        logger.info('Emptying out the Office table for a complete reload.')
        sess.execute('''TRUNCATE TABLE office RESTART IDENTITY''')

    try:
        pull_offices(sess, filename, not args.ignore_db, args.all,
                     updated_date_from, export_office, metrics_json)
    except Exception as e:
        logger.exception(e)
        sys.exit(1)

    # find if there were any new cgacs/subtiers added
    all_cgacs = [cgac.cgac_code for cgac in sess.query(CGAC.cgac_code)]
    all_frecs = [frec.frec_code for frec in sess.query(FREC.frec_code)]
    all_subtiers = [
        subtier.sub_tier_agency_code
        for subtier in sess.query(SubTierAgency.sub_tier_agency_code)
    ]
    metrics_json['missing_cgacs'] = list(
        set(metrics_json['missing_cgacs']) - set(all_cgacs + all_frecs))
    metrics_json['missing_subtier_codes'] = list(
        set(metrics_json['missing_subtier_codes']) - set(all_subtiers))

    update_external_data_load_date(now, datetime.now(), 'office')

    metrics_json['duration'] = str(datetime.now() - now)

    with open('load_federal_hierarchy_metrics.json', 'w+') as metrics_file:
        json.dump(metrics_json, metrics_file)
    logger.info('Script complete')
    def run_validation(self, job):
        """ Run validations for specified job
        Args:
            job: Job to be validated
        Returns:
            True if successful
        """

        sess = GlobalDB.db().session
        error_list = ErrorInterface()
        job_id = job.job_id
        submission_id = job.submission_id

        row_number = 1
        file_type = job.file_type.name
        validation_start = datetime.now()

        log_str = 'on submission_id: {}, job_id: {}, file_type: {}'.format(
            str(submission_id), str(job_id), file_type)
        logger.info({
            'message': 'Beginning run_validation {}'.format(log_str),
            'message_type': 'ValidatorInfo',
            'submission_id': submission_id,
            'job_id': job_id,
            'file_type': file_type,
            'action': 'run_validations',
            'status': 'start',
            'start_time': validation_start
        })
        # Get orm model for this file
        model = [ft.model for ft in FILE_TYPE if ft.name == file_type][0]

        # Delete existing file level errors for this submission
        sess.query(ErrorMetadata).filter(
            ErrorMetadata.job_id == job_id).delete()
        sess.commit()

        # Clear existing records for this submission
        sess.query(model).filter_by(submission_id=submission_id).delete()
        sess.commit()

        # Clear existing flex fields for this job
        sess.query(FlexField).filter_by(job_id=job_id).delete()
        sess.commit()

        # If local, make the error report directory
        if self.is_local and not os.path.exists(self.directory):
            os.makedirs(self.directory)
        # Get bucket name and file name
        file_name = job.filename
        bucket_name = CONFIG_BROKER['aws_bucket']
        region_name = CONFIG_BROKER['aws_region']

        error_file_name = report_file_name(job.submission_id, False,
                                           job.file_type.name)
        error_file_path = "".join(
            [CONFIG_SERVICES['error_report_path'], error_file_name])
        warning_file_name = report_file_name(job.submission_id, True,
                                             job.file_type.name)
        warning_file_path = "".join(
            [CONFIG_SERVICES['error_report_path'], warning_file_name])

        # Create File Status object
        create_file_if_needed(job_id, file_name)

        reader = CsvReader()

        # Get file size and write to jobs table
        if CONFIG_BROKER["use_aws"]:
            file_size = S3Handler.get_file_size(file_name)
        else:
            file_size = os.path.getsize(file_name)
        job.file_size = file_size
        sess.commit()

        # Get fields for this file
        fields = sess.query(FileColumn).filter(
            FileColumn.file_id == FILE_TYPE_DICT[file_type]).all()

        for field in fields:
            sess.expunge(field)

        csv_schema = {row.name_short: row for row in fields}

        try:
            extension = os.path.splitext(file_name)[1]
            if not extension or extension.lower() not in ['.csv', '.txt']:
                raise ResponseException("", StatusCode.CLIENT_ERROR, None,
                                        ValidationError.fileTypeError)

            # Count file rows: throws a File Level Error for non-UTF8 characters
            temp_file = open(reader.get_filename(region_name, bucket_name,
                                                 file_name),
                             encoding='utf-8')
            file_row_count = len(list(csv.reader(temp_file)))
            try:
                temp_file.close()
            except AttributeError:
                # File does not exist, and so does not need to be closed
                pass

            # Pull file and return info on whether it's using short or long col headers
            reader.open_file(region_name,
                             bucket_name,
                             file_name,
                             fields,
                             bucket_name,
                             self.get_file_name(error_file_name),
                             self.long_to_short_dict[job.file_type_id],
                             is_local=self.is_local)

            # list to keep track of rows that fail validations
            error_rows = []

            # While not done, pull one row and put it into staging table if it passes
            # the Validator

            loading_start = datetime.now()
            logger.info({
                'message': 'Beginning data loading {}'.format(log_str),
                'message_type': 'ValidatorInfo',
                'submission_id': submission_id,
                'job_id': job_id,
                'file_type': file_type,
                'action': 'data_loading',
                'status': 'start',
                'start_time': loading_start
            })

            with open(error_file_path, 'w', newline='') as error_file,\
                    open(warning_file_path, 'w', newline='') as warning_file:
                error_csv = csv.writer(error_file,
                                       delimiter=',',
                                       quoting=csv.QUOTE_MINIMAL,
                                       lineterminator='\n')
                warning_csv = csv.writer(warning_file,
                                         delimiter=',',
                                         quoting=csv.QUOTE_MINIMAL,
                                         lineterminator='\n')

                required_list = None
                type_list = None
                if file_type == "fabs":
                    # create a list of all required/type labels for FABS
                    labels = sess.query(ValidationLabel).all()
                    required_list = {}
                    type_list = {}
                    for label in labels:
                        if label.label_type == "requirement":
                            required_list[label.column_name] = label.label
                        else:
                            type_list[label.column_name] = label.label

                # write headers to file
                error_csv.writerow(self.reportHeaders)
                warning_csv.writerow(self.reportHeaders)
                while not reader.is_finished:
                    row_number += 1

                    if row_number % 100 == 0:
                        elapsed_time = (datetime.now() -
                                        loading_start).total_seconds()
                        logger.info({
                            'message':
                            'Loading row: {} {}'.format(
                                str(row_number), log_str),
                            'message_type':
                            'ValidatorInfo',
                            'submission_id':
                            submission_id,
                            'job_id':
                            job_id,
                            'file_type':
                            file_type,
                            'action':
                            'data_loading',
                            'status':
                            'loading',
                            'rows_loaded':
                            row_number,
                            'start_time':
                            loading_start,
                            'elapsed_time':
                            elapsed_time
                        })
                    #
                    # first phase of validations: read record and record a
                    # formatting error if there's a problem
                    #
                    (record, reduceRow, skip_row, doneReading, rowErrorHere, flex_cols) = \
                        self.read_record(reader, error_csv, row_number, job, fields, error_list)
                    if reduceRow:
                        row_number -= 1
                    if rowErrorHere:
                        error_rows.append(row_number)
                    if doneReading:
                        # Stop reading from input file
                        break
                    elif skip_row:
                        # Do not write this row to staging, but continue processing future rows
                        continue

                    #
                    # second phase of validations: do basic schema checks
                    # (e.g., require fields, field length, data type)
                    #
                    # D files are obtained from upstream systems (ASP and FPDS) that perform their own basic
                    # validations, so these validations are not repeated here
                    if file_type in ["award", "award_procurement"]:
                        # Skip basic validations for D files, set as valid to trigger write to staging
                        passed_validations = True
                        valid = True
                    else:
                        if file_type == "fabs":
                            record['afa_generated_unique'] = (record['award_modification_amendme'] or '-none-') + "_" +\
                                                             (record['awarding_sub_tier_agency_c'] or '-none-') + \
                                                             "_" + (record['fain'] or '-none-') + "_" + \
                                                             (record['uri'] or '-none-')
                        passed_validations, failures, valid = Validator.validate(
                            record, csv_schema, file_type == "fabs",
                            required_list, type_list)
                    if valid:
                        # todo: update this logic later when we have actual validations
                        if file_type == "fabs":
                            record["is_valid"] = True

                        model_instance = model(job_id=job_id,
                                               submission_id=submission_id,
                                               valid_record=passed_validations,
                                               **record)
                        skip_row = not insert_staging_model(
                            model_instance, job, error_csv, error_list)
                        if flex_cols:
                            sess.add_all(flex_cols)
                            sess.commit()

                        if skip_row:
                            error_rows.append(row_number)
                            continue

                    if not passed_validations:
                        fatal = write_errors(
                            failures, job,
                            self.short_to_long_dict[job.file_type_id],
                            error_csv, warning_csv, row_number, error_list,
                            flex_cols)
                        if fatal:
                            error_rows.append(row_number)

                loading_duration = (datetime.now() -
                                    loading_start).total_seconds()
                logger.info({
                    'message':
                    'Completed data loading {}'.format(log_str),
                    'message_type':
                    'ValidatorInfo',
                    'submission_id':
                    submission_id,
                    'job_id':
                    job_id,
                    'file_type':
                    file_type,
                    'action':
                    'data_loading',
                    'status':
                    'finish',
                    'start_time':
                    loading_start,
                    'end_time':
                    datetime.now(),
                    'duration':
                    loading_duration,
                    'total_rows':
                    row_number
                })

                if file_type in ('appropriations', 'program_activity',
                                 'award_financial'):
                    update_tas_ids(model, submission_id)
                #
                # third phase of validations: run validation rules as specified
                # in the schema guidance. these validations are sql-based.
                #
                sql_error_rows = self.run_sql_validations(
                    job, file_type, self.short_to_long_dict[job.file_type_id],
                    error_csv, warning_csv, row_number, error_list)
                error_rows.extend(sql_error_rows)
            error_file.close()
            warning_file.close()

            # stream file to S3 when not local
            if not self.is_local:
                # stream error file
                with open(error_file_path, 'rb') as csv_file:
                    with smart_open.smart_open(S3Handler.create_file_path(self.get_file_name(error_file_name)), 'w')\
                            as writer:
                        while True:
                            chunk = csv_file.read(CHUNK_SIZE)
                            if chunk:
                                writer.write(chunk)
                            else:
                                break
                csv_file.close()
                os.remove(error_file_path)

                # stream warning file
                with open(warning_file_path, 'rb') as warning_csv_file:
                    with smart_open.smart_open(S3Handler.create_file_path(self.get_file_name(warning_file_name)), 'w')\
                            as warning_writer:
                        while True:
                            chunk = warning_csv_file.read(CHUNK_SIZE)
                            if chunk:
                                warning_writer.write(chunk)
                            else:
                                break
                warning_csv_file.close()
                os.remove(warning_file_path)

            # Calculate total number of rows in file
            # that passed validations
            error_rows_unique = set(error_rows)
            total_rows_excluding_header = row_number - 1
            valid_rows = total_rows_excluding_header - len(error_rows_unique)

            # Update fabs is_valid rows where applicable
            # Update submission to include action dates where applicable
            if file_type == "fabs":
                sess.query(DetachedAwardFinancialAssistance).\
                    filter(DetachedAwardFinancialAssistance.row_number.in_(error_rows_unique),
                           DetachedAwardFinancialAssistance.submission_id == submission_id).\
                    update({"is_valid": False}, synchronize_session=False)
                sess.commit()
                min_action_date, max_action_date = get_action_dates(
                    submission_id)
                sess.query(Submission).filter(Submission.submission_id == submission_id).\
                    update({"reporting_start_date": min_action_date, "reporting_end_date": max_action_date},
                           synchronize_session=False)

            # Ensure validated rows match initial row count
            if file_row_count != row_number:
                raise ResponseException("", StatusCode.CLIENT_ERROR, None,
                                        ValidationError.rowCountError)

            # Update job metadata
            job.number_of_rows = row_number
            job.number_of_rows_valid = valid_rows
            sess.commit()

            error_list.write_all_row_errors(job_id)
            # Update error info for submission
            populate_job_error_info(job)

            if file_type == "fabs":
                # set number of errors and warnings for detached submission
                populate_submission_error_info(submission_id)

            # Mark validation as finished in job tracker
            mark_job_status(job_id, "finished")
            mark_file_complete(job_id, file_name)
        finally:
            # Ensure the files always close
            reader.close()

            validation_duration = (datetime.now() -
                                   validation_start).total_seconds()
            logger.info({
                'message':
                'Completed run_validation {}'.format(log_str),
                'message_type':
                'ValidatorInfo',
                'submission_id':
                submission_id,
                'job_id':
                job_id,
                'file_type':
                file_type,
                'action':
                'run_validation',
                'status':
                'finish',
                'start_time':
                validation_start,
                'end_time':
                datetime.now(),
                'duration':
                validation_duration
            })

        return True
示例#45
0
def validator_process_job(job_id, agency_code, is_retry=False):
    """ Retrieves a Job based on its ID, and kicks off a validation. Handles errors by ensuring the Job (if exists) is
        no longer running.

        Args:
            job_id: ID of a Job
            agency_code: CGAC or FREC code for agency, only required for file generations by Job
            is_retry: If this is not the very first time handling execution of this job. If True, cleanup is
                      performed before proceeding to retry the job

        Raises:
            Any Exceptions raised by the GenerationManager or ValidationManager, excluding those explicitly handled
    """
    if is_retry:
        if cleanup_validation(job_id):
            log_job_message(
                logger=logger,
                message=
                "Attempting a retry of {} after successful retry-cleanup.".
                format(inspect.stack()[0][3]),
                job_id=job_id,
                is_debug=True)
        else:
            log_job_message(
                logger=logger,
                message="Retry of {} found to be not necessary after cleanup. "
                "Returning from job with success.".format(
                    inspect.stack()[0][3]),
                job_id=job_id,
                is_debug=True)
            return

    sess = GlobalDB.db().session
    job = None

    try:
        # Get the job
        job = sess.query(Job).filter_by(job_id=job_id).one_or_none()
        if job is None:
            validation_error_type = ValidationError.jobError
            write_file_error(job_id, None, validation_error_type)
            raise ResponseException(
                'Job ID {} not found in database'.format(job_id),
                StatusCode.CLIENT_ERROR, None, validation_error_type)

        mark_job_status(job_id, 'ready')

        # We can either validate or generate a file based on Job ID
        if job.job_type.name == 'file_upload':
            # Generate A, E, or F file
            file_generation_manager = FileGenerationManager(sess,
                                                            g.is_local,
                                                            job=job)
            file_generation_manager.generate_file(agency_code)
        else:
            # Run validations
            validation_manager = ValidationManager(
                g.is_local, CONFIG_SERVICES['error_report_path'])
            validation_manager.validate_job(job.job_id)

    except (ResponseException, csv.Error, UnicodeDecodeError, ValueError) as e:
        # Handle exceptions explicitly raised during validation
        error_data = {
            'message': 'An exception occurred in the Validator',
            'message_type': 'ValidatorInfo',
            'job_id': job_id,
            'traceback': traceback.format_exc()
        }

        if job:
            error_data.update({
                'submission_id': job.submission_id,
                'file_type': job.file_type.name
            })
            logger.error(error_data)

            sess.refresh(job)
            job.error_message = str(e)
            if job.filename is not None:
                error_type = ValidationError.unknownError
                if isinstance(e, UnicodeDecodeError):
                    error_type = ValidationError.encodingError
                elif isinstance(e, ResponseException):
                    error_type = e.errorType

                write_file_error(job.job_id, job.filename, error_type)

            mark_job_status(job.job_id, 'invalid')
        else:
            logger.error(error_data)
            raise e

    except Exception as e:
        # Log uncaught exceptions and fail the Job
        error_data = {
            'message': 'An unhandled exception occurred in the Validator',
            'message_type': 'ValidatorInfo',
            'job_id': job_id,
            'traceback': traceback.format_exc()
        }
        if job:
            error_data.update({
                'submission_id': job.submission_id,
                'file_type': job.file_type.name
            })
        logger.error(error_data)

        # Try to mark the Job as failed, but continue raising the original Exception if not possible
        try:
            mark_job_status(job_id, 'failed')

            sess.refresh(job)
            job.error_message = str(e)
            sess.commit()
        except:
            pass

        raise e
    def run_cross_validation(self, job):
        """ Cross file validation job. Test all rules with matching rule_timing.
            Run each cross-file rule and create error report.

            Args:
                job: Current job
        """
        sess = GlobalDB.db().session
        job_id = job.job_id
        # Create File Status object
        create_file_if_needed(job_id)
        # Create list of errors
        error_list = ErrorInterface()

        submission_id = job.submission_id
        job_start = datetime.now()
        logger.info({
            'message':
            'Beginning cross-file validations on submission_id: ' +
            str(submission_id),
            'message_type':
            'ValidatorInfo',
            'submission_id':
            submission_id,
            'job_id':
            job.job_id,
            'action':
            'run_cross_validations',
            'start':
            job_start,
            'status':
            'start'
        })
        # Delete existing cross file errors for this submission
        sess.query(ErrorMetadata).filter(
            ErrorMetadata.job_id == job_id).delete()
        sess.commit()

        # get all cross file rules from db
        cross_file_rules = sess.query(RuleSql).filter_by(
            rule_cross_file_flag=True)

        # for each cross-file combo, run associated rules and create error report
        for c in get_cross_file_pairs():
            first_file = c[0]
            second_file = c[1]
            combo_rules = cross_file_rules.filter(
                or_(
                    and_(RuleSql.file_id == first_file.id,
                         RuleSql.target_file_id == second_file.id),
                    and_(RuleSql.file_id == second_file.id,
                         RuleSql.target_file_id == first_file.id)))

            # get error file name/path
            error_file_name = report_file_name(submission_id, False,
                                               first_file.name,
                                               second_file.name)
            error_file_path = "".join(
                [CONFIG_SERVICES['error_report_path'], error_file_name])
            warning_file_name = report_file_name(submission_id, True,
                                                 first_file.name,
                                                 second_file.name)
            warning_file_path = "".join(
                [CONFIG_SERVICES['error_report_path'], warning_file_name])

            # open error report and gather failed rules within it
            with open(error_file_path, 'w', newline='') as error_file,\
                    open(warning_file_path, 'w', newline='') as warning_file:
                error_csv = csv.writer(error_file,
                                       delimiter=',',
                                       quoting=csv.QUOTE_MINIMAL,
                                       lineterminator='\n')
                warning_csv = csv.writer(warning_file,
                                         delimiter=',',
                                         quoting=csv.QUOTE_MINIMAL,
                                         lineterminator='\n')

                # write headers to file
                error_csv.writerow(self.crossFileReportHeaders)
                warning_csv.writerow(self.crossFileReportHeaders)

                # send comboRules to validator.crossValidate sql
                current_cols_short_to_long = self.short_to_long_dict[
                    first_file.id].copy()
                current_cols_short_to_long.update(
                    self.short_to_long_dict[second_file.id].copy())
                cross_validate_sql(combo_rules.all(), submission_id,
                                   current_cols_short_to_long, first_file.id,
                                   second_file.id, job, error_csv, warning_csv,
                                   error_list, job_id)
            # close files
            error_file.close()
            warning_file.close()

            # stream file to S3 when not local
            if not self.is_local:
                # stream error file
                with open(error_file_path, 'rb') as csv_file:
                    with smart_open.smart_open(
                            S3Handler.create_file_path(
                                self.get_file_name(error_file_name)),
                            'w') as writer:
                        while True:
                            chunk = csv_file.read(CHUNK_SIZE)
                            if chunk:
                                writer.write(chunk)
                            else:
                                break
                csv_file.close()
                os.remove(error_file_path)

                # stream warning file
                with open(warning_file_path, 'rb') as warning_csv_file:
                    with smart_open.smart_open(
                            S3Handler.create_file_path(
                                self.get_file_name(warning_file_name)),
                            'w') as warning_writer:
                        while True:
                            chunk = warning_csv_file.read(CHUNK_SIZE)
                            if chunk:
                                warning_writer.write(chunk)
                            else:
                                break
                warning_csv_file.close()
                os.remove(warning_file_path)

        # write all recorded errors to database
        error_list.write_all_row_errors(job_id)
        # Update error info for submission
        populate_job_error_info(job)

        # mark job status as "finished"
        mark_job_status(job_id, "finished")
        job_duration = (datetime.now() - job_start).total_seconds()
        logger.info({
            'message':
            'Completed cross-file validations on submission_id: ' +
            str(submission_id),
            'message_type':
            'ValidatorInfo',
            'submission_id':
            submission_id,
            'job_id':
            job.job_id,
            'action':
            'run_cross_validations',
            'status':
            'finish',
            'start':
            job_start,
            'duration':
            job_duration
        })
        # set number of errors and warnings for submission.
        submission = populate_submission_error_info(submission_id)
        # TODO: Remove temporary step below
        # Temporarily set publishable flag at end of cross file, remove this once users are able to mark their
        # submissions as publishable
        # Publish only if no errors are present
        if submission.number_of_errors == 0:
            submission.publishable = True
        sess.commit()

        # Mark validation complete
        mark_file_complete(job_id)
示例#47
0
    def setUpClass(cls):
        """Set up class-wide resources (test data)"""
        super(JobTests, cls).setUpClass()
        user = cls.userId

        # Flag for testing a million+ errors (can take ~30 min to run)
        cls.includeLongTests = False

        with createApp().app_context():
            # get the submission test user
            sess = GlobalDB.db().session

            # Create test submissions and jobs, also uploading
            # the files needed for each job.
            jobDict = {}

            submissionId = cls.insertSubmission(sess, user)
            job_info = Job(job_status_id=JOB_STATUS_DICT['ready'],
                           job_type_id=JOB_TYPE_DICT['file_upload'],
                           file_type_id=FILE_TYPE_DICT['appropriations'],
                           submission_id=submissionId)
            sess.add(job_info)
            sess.flush()
            jobDict['bad_upload'] = job_info.job_id

            submissionId = cls.insertSubmission(sess, user)
            job_info = Job(job_status_id=JOB_STATUS_DICT['ready'],
                           job_type_id=JOB_TYPE_DICT['csv_record_validation'],
                           file_type_id=FILE_TYPE_DICT['appropriations'],
                           submission_id=submissionId)
            sess.add(job_info)
            sess.flush()
            jobDict['bad_prereq'] = job_info.job_id

            submissionId = cls.insertSubmission(sess, user)
            job_info = Job(job_status_id=JOB_STATUS_DICT['ready'],
                           job_type_id=JOB_TYPE_DICT['external_validation'],
                           file_type_id=FILE_TYPE_DICT['appropriations'],
                           submission_id=submissionId)
            sess.add(job_info)
            sess.flush()
            jobDict['wrong_type'] = job_info.job_id

            submissionId = cls.insertSubmission(sess, user)
            job_info = Job(job_status_id=JOB_STATUS_DICT['finished'],
                           job_type_id=JOB_TYPE_DICT['csv_record_validation'],
                           file_type_id=FILE_TYPE_DICT['appropriations'],
                           submission_id=submissionId)
            sess.add(job_info)
            sess.flush()
            jobDict['not_ready'] = job_info.job_id

            submissionId = cls.insertSubmission(sess, user)
            job_info = Job(filename=cls.uploadFile('testEmpty.csv', user),
                           job_status_id=JOB_STATUS_DICT['ready'],
                           job_type_id=JOB_TYPE_DICT['csv_record_validation'],
                           file_type_id=FILE_TYPE_DICT['appropriations'],
                           submission_id=submissionId)
            sess.add(job_info)
            sess.flush()
            jobDict['empty'] = job_info.job_id

            # create dependency
            dependency = JobDependency(job_id=jobDict["bad_prereq"],
                                       prerequisite_id=jobDict["bad_upload"])
            sess.add(dependency)

            colIdDict = {}
            for fileId in range(1, 5):
                for columnId in range(1, 6):
                    if columnId < 3:
                        fieldType = FIELD_TYPE_DICT['INT']
                    else:
                        fieldType = FIELD_TYPE_DICT['STRING']
                    columnName = "header_{}".format(columnId)

                    fileCol = FileColumn(
                        file_id=fileId,
                        field_types_id=fieldType,
                        name=columnName,
                        required=(columnId != FIELD_TYPE_DICT['STRING']))
                    sess.add(fileCol)
                    sess.flush()
                    colIdDict["header_{}_file_type_{}".format(
                        columnId, fileId)] = fileCol.file_column_id

            # commit submissions/jobs and output IDs
            sess.commit()
            for job_type, job_id in jobDict.items():
                print('{}: {}'.format(job_type, job_id))

            cls.jobDict = jobDict
    def setUpClass(cls):
        """ Set up class-wide resources (test data) """
        super(FABSUploadTests, cls).setUpClass()
        # TODO: refactor into a pytest fixture

        with create_app().app_context():
            # get the submission test users
            sess = GlobalDB.db().session
            cls.session = sess
            admin_user = sess.query(User).filter(
                User.email == cls.test_users['admin_user']).one()
            agency_user = sess.query(User).filter(
                User.email == cls.test_users['agency_user']).one()
            editfabs_user = sess.query(User).filter(
                User.email == cls.test_users['editfabs_user']).one()
            cls.admin_user_id = admin_user.user_id
            cls.agency_user_id = agency_user.user_id
            cls.agency_user_email = agency_user.email
            cls.editfabs_email = editfabs_user.email

            # setup submission/jobs data for test_check_status
            cls.d2_submission = cls.insert_submission(sess,
                                                      cls.admin_user_id,
                                                      cgac_code="SYS",
                                                      start_date="10/2015",
                                                      end_date="12/2015",
                                                      is_quarter=True)

            cls.d2_submission_2 = cls.insert_submission(sess,
                                                        cls.agency_user_id,
                                                        cgac_code="SYS",
                                                        start_date="10/2015",
                                                        end_date="12/2015",
                                                        is_quarter=True)

            cls.published_submission = cls.insert_submission(
                sess,
                cls.admin_user_id,
                cgac_code="SYS",
                start_date="10/2015",
                end_date="12/2015",
                is_quarter=True,
                publish_status_id=PUBLISH_STATUS_DICT["published"])

            cls.other_submission = cls.insert_submission(sess,
                                                         cls.admin_user_id,
                                                         cgac_code="SYS",
                                                         start_date="07/2015",
                                                         end_date="09/2015",
                                                         is_quarter=True,
                                                         d2_submission=False)

            cls.test_agency_user_submission_id = cls.insert_submission(
                sess,
                cls.agency_user_id,
                cgac_code="NOT",
                start_date="10/2015",
                end_date="12/2015",
                is_quarter=True,
                d2_submission=True)
            cls.insert_agency_user_submission_data(
                sess, cls.test_agency_user_submission_id)
def updateLastLogin(user, unlock_user=False):
    """ This updates the last login date to today's datetime for the user to the current date upon successful login.
    """
    sess = GlobalDB.db().session
    user.last_login_date = time.strftime("%c") if not unlock_user else None
    sess.commit()
def load_sf133(filename, fiscal_year, fiscal_period, force_load=False):
    """Load SF 133 (budget execution report) lookup table."""

    with createApp().app_context():
        sess = GlobalDB.db().session

        existing_records = sess.query(SF133).filter(
            SF133.fiscal_year == fiscal_year, SF133.period == fiscal_period)
        if force_load:
            # force a reload of this period's current data
            logger.info('Force SF 133 load: deleting existing records for {} {}'.format(
                fiscal_year, fiscal_period))
            delete_count = existing_records.delete()
            logger.info('{} records deleted'.format(delete_count))
        elif existing_records.count():
            # if there's existing data & we're not forcing a load, skip
            logger.info('SF133 {} {} already in database ({} records). Skipping file.'.format(
                fiscal_year, fiscal_period, existing_records.count()))
            return

        data = pd.read_csv(filename, dtype=str)
        data = LoaderUtils.cleanData(
            data,
            SF133,
            {"ata": "allocation_transfer_agency",
             "aid": "agency_identifier",
             "availability_type_code": "availability_type_code",
             "bpoa": "beginning_period_of_availa",
             "epoa": "ending_period_of_availabil",
             "main_account": "main_account_code",
             "sub_account": "sub_account_code",
             "fiscal_year": "fiscal_year",
             "period": "period",
             "line_num": "line",
             "amount_summed":
            "amount"},
            {"allocation_transfer_agency": {"pad_to_length": 3},
             "agency_identifier": {"pad_to_length": 3},
             "main_account_code": {"pad_to_length": 4},
             "sub_account_code": {"pad_to_length": 3},
             # next 3 lines handle the TAS fields that shouldn't
             # be padded but should still be empty spaces rather
             # than NULLs. this ensures that the downstream pivot & melt
             # (which insert the missing 0-value SF-133 lines)
             # will work as expected (values used in the pivot
             # index cannot be NULL).
             # the "pad_to_length: 0" works around the fact
             # that sometimes the incoming data for these columns
             # is a single space and sometimes it is blank/NULL.
             "beginning_period_of_availa": {"pad_to_length": 0},
             "ending_period_of_availabil": {"pad_to_length": 0},
             "availability_type_code": {"pad_to_length": 0},
             "amount": {"strip_commas": True}}
        )

        # todo: find out how to handle dup rows (e.g., same tas/period/line number)
        # line numbers 2002 and 2012 are the only duped SF 133 report line numbers,
        # and they are not used by the validation rules, so for now
        # just remove them before loading our SF-133 table
        dupe_line_numbers = ['2002', '2102']
        data = data[~data.line.isin(dupe_line_numbers)]

        # add concatenated TAS field for internal use (i.e., joining to staging tables)
        data['tas'] = data.apply(lambda row: format_internal_tas(row), axis=1)

        # incoming .csv does not always include rows for zero-value SF-133 lines
        # so we add those here because they're needed for the SF-133 validations.
        # 1. "pivot" the sf-133 dataset to explode it horizontally, creating one
        # row for each tas/fiscal year/period, with columns for each SF-133 line.
        # the "fill_value=0" parameter puts a 0 into any Sf-133 line number cell
        # with a missing value for a specific tas/fiscal year/period.
        # 2. Once the zeroes are filled in, "melt" the pivoted data back to its normal
        # format of one row per tas/fiscal year/period.
        # NOTE: fields used for the pivot in step #1 (i.e., items in pivot_idx) cannot
        # have NULL values, else they will be silently dropped by pandas :(
        pivot_idx = ['created_at', 'updated_at', 'agency_identifier', 'allocation_transfer_agency',
                     'availability_type_code', 'beginning_period_of_availa', 'ending_period_of_availabil',
                     'main_account_code', 'sub_account_code', 'tas', 'fiscal_year', 'period']
        data.amount = data.amount.astype(float)
        data = pd.pivot_table(data, values='amount', index=pivot_idx, columns=['line'], fill_value=0).reset_index()
        data = pd.melt(data, id_vars=pivot_idx, value_name='amount')

        # Now that we've added zero lines for EVERY tas and SF 133 line number, get rid of the ones
        # we don't actually use in the validations. Arguably, it would be better just to include
        # everything, but that drastically increases the number of records we're inserting to the
        # sf_133 table. If we ever decide that we need *all* SF 133 lines that are zero value,
        # remove the next two lines.
        sf_133_validation_lines = [
            '1000', '1010', '1011', '1012', '1013', '1020', '1021', '1022',
            '1023', '1024', '1025', '1026', '1029', '1030', '1031', '1032',
            '1033', '1040', '1041', '1042', '1160', '1180', '1260', '1280',
            '1340', '1440', '1540', '1640', '1750', '1850', '1910', '2190',
            '2490', '2500', '3020', '4801', '4802', '4881', '4882', '4901',
            '4902', '4908', '4981', '4982'
        ]
        data = data[(data.line.isin(sf_133_validation_lines)) | (data.amount != 0)]

        # we didn't use the the 'keep_null' option when padding allocation transfer agency,
        # because nulls in that column break the pivot (see above comments).
        # so, replace the ata '000' with an empty value before inserting to db
        data['allocation_transfer_agency'] = data['allocation_transfer_agency'].str.replace('000', '')
        # make a pass through the dataframe, changing any empty values to None, to ensure
        # that those are represented as NULL in the db.
        data = data.applymap(lambda x: str(x).strip() if len(str(x).strip()) else None)

        # insert to db
        table_name = SF133.__table__.name
        num = LoaderUtils.insertDataframe(data, table_name, sess.connection())
        sess.commit()

    logger.info('{} records inserted to {}'.format(num, table_name))
def create_jobs(upload_files, submission, existing_submission=False):
    """Create the set of jobs associated with the specified submission

    Arguments:
    upload_files -- list of named tuples that describe files uploaded to the broker
    submission -- submission
    existing_submission -- true if we should update jobs in an existing submission rather than creating new jobs

    Returns:
    Dictionary of upload ids by filename to return to client, used for calling finalize_submission route
    """
    sess = GlobalDB.db().session
    submission_id = submission.submission_id

    # create the file upload and single-file validation jobs and
    # set up the dependencies between them
    # before starting, sort the incoming list of jobs by letter
    # to ensure that jobs dependent on the awards jobs being present
    # are processed last.
    jobs_required = []
    upload_dict= {}
    sorted_uploads = sorted(upload_files, key=attrgetter('file_letter'))

    for upload_file in sorted_uploads:
        validation_job_id, upload_job_id = add_jobs_for_uploaded_file(upload_file, submission_id, existing_submission)
        if validation_job_id:
            jobs_required.append(validation_job_id)
        upload_dict[upload_file.file_type] = upload_job_id

    # once single-file upload/validation jobs are created, create the cross-file
    # validation job and dependencies
    # todo: remove external validation jobs from the code-base--they aren't used
    if existing_submission:
        # find cross-file and external validation jobs and mark them as waiting
        # (note: job_type of 'validation' is a cross-file job)
        val_job = sess.query(Job).\
            filter_by(
                submission_id = submission_id,
                job_type_id = JOB_TYPE_DICT["validation"]).\
            one()
        val_job.job_status_id = JOB_STATUS_DICT["waiting"]
        ext_job = sess.query(Job).\
            filter_by(
                submission_id = submission_id,
                job_type_id = JOB_TYPE_DICT["external_validation"]).\
            one()
        ext_job.job_status_id = JOB_STATUS_DICT["waiting"]
        submission.updated_at = time.strftime("%c")
    else:
        # create cross-file validation job
        validation_job = Job(
            job_status_id=JOB_STATUS_DICT["waiting"],
            job_type_id=JOB_TYPE_DICT["validation"],
            submission_id=submission_id)
        sess.add(validation_job)
        # create external validation job
        external_job = Job(
            job_status_id=JOB_STATUS_DICT["waiting"],
            job_type_id=JOB_TYPE_DICT["external_validation"],
            submission_id=submission_id)
        sess.add(external_job)
        sess.flush()
        # create dependencies for validation jobs
        for job_id in jobs_required:
            val_dependency = JobDependency(job_id=validation_job.job_id, prerequisite_id=job_id)
            sess.add(val_dependency)
            ext_dependency = JobDependency(job_id=external_job.job_id, prerequisite_id=job_id)
            sess.add(ext_dependency)

    sess.commit()
    upload_dict["submission_id"] = submission_id
    return upload_dict
示例#52
0
def certify_dabs_submission(submission, file_manager):
    """ Certify a DABS submission

        Args:
            submission: the submission to be certified
            file_manager: a FileHandler object to be used to call move_certified_files

        Returns:
            Nothing if successful, JsonResponse error containing the details of the error if something went wrong
    """
    current_user_id = g.user.user_id

    if not submission.publishable:
        return JsonResponse.error(
            ValueError(
                "Submission cannot be certified due to critical errors"),
            StatusCode.CLIENT_ERROR)

    if not submission.is_quarter_format:
        return JsonResponse.error(
            ValueError("Monthly submissions cannot be certified"),
            StatusCode.CLIENT_ERROR)

    if submission.publish_status_id == PUBLISH_STATUS_DICT['published']:
        return JsonResponse.error(
            ValueError("Submission has already been certified"),
            StatusCode.CLIENT_ERROR)

    windows = get_windows()
    for window in windows:
        if window.block_certification:
            return JsonResponse.error(ValueError(window.message),
                                      StatusCode.CLIENT_ERROR)

    response = find_existing_submissions_in_period(
        submission.cgac_code, submission.frec_code,
        submission.reporting_fiscal_year, submission.reporting_fiscal_period,
        submission.submission_id)

    if response.status_code == StatusCode.OK:
        sess = GlobalDB.db().session

        # create the certify_history entry
        certify_history = CertifyHistory(
            created_at=datetime.utcnow(),
            user_id=current_user_id,
            submission_id=submission.submission_id)
        sess.add(certify_history)
        sess.commit()

        # get the certify_history entry including the PK
        certify_history = sess.query(CertifyHistory).filter_by(submission_id=submission.submission_id).\
            order_by(CertifyHistory.created_at.desc()).first()

        # move files (locally we don't move but we still need to populate the certified_files_history table)
        file_manager.move_certified_files(submission, certify_history,
                                          file_manager.is_local)

        # set submission contents
        submission.certifying_user_id = current_user_id
        submission.publish_status_id = PUBLISH_STATUS_DICT['published']
        sess.commit()
        print(submission.__dict__)
示例#53
0
def start_generation_job(job, start_date, end_date, agency_code=None):
    """ Validates the dates for a D file generation job and passes the Job ID to SQS

        Args:
            job: File generation job to start
            start_date: Start date of the file generation
            end_date: End date of the file generation
            agency_code: Agency code for detached D file generations

        Returns:
            Tuple of boolean indicating successful start, and error response if False
    """
    sess = GlobalDB.db().session
    file_type = job.file_type.letter_name
    try:
        if file_type in ['D1', 'D2']:
            # Validate and set Job's start and end dates
            if not (StringCleaner.is_date(start_date)
                    and StringCleaner.is_date(end_date)):
                raise ResponseException(
                    "Start or end date cannot be parsed into a date",
                    StatusCode.CLIENT_ERROR)
            job.start_date = start_date
            job.end_date = end_date
            sess.commit()
        elif file_type not in ["E", "F"]:
            raise ResponseException("File type must be either D1, D2, E or F",
                                    StatusCode.CLIENT_ERROR)

    except ResponseException as e:
        return False, JsonResponse.error(e,
                                         e.status,
                                         file_type=file_type,
                                         status='failed')

    mark_job_status(job.job_id, "waiting")

    # Add job_id to the SQS job queue
    logger.info({
        'message_type':
        'ValidatorInfo',
        'job_id':
        job.job_id,
        'message':
        'Sending file generation job {} to Validator in SQS'.format(job.job_id)
    })
    queue = sqs_queue()

    message_attr = {
        'agency_code': {
            'DataType': 'String',
            'StringValue': agency_code
        }
    } if agency_code else {}
    response = queue.send_message(MessageBody=str(job.job_id),
                                  MessageAttributes=message_attr)
    logger.debug({
        'message_type': 'ValidatorInfo',
        'job_id': job.job_id,
        'message': 'Send message response: {}'.format(response)
    })

    return True, None
    def check_current_page(submission):

        sess = GlobalDB.db().session

        submission_id = submission.submission_id

        # /v1/uploadDetachedFiles/
        # DetachedFiles
        if submission.d2_submission:
            data = {
                "message": "The current progress of this submission ID is on /v1/uploadDetachedFiles/ page.",
                "step": "6"
            }
            return JsonResponse.create(StatusCode.OK, data)

        # /v1/reviewData/
        # Checks that both E and F files are finished
        review_data = sess.query(Job).filter(Job.submission_id == submission_id,
                                             Job.file_type_id.in_([6, 7]), Job.job_status_id == 4)

        # Need to check that cross file is done as well
        generate_ef = sess.query(Job).filter(Job.submission_id == submission_id, Job.job_type_id == 4,
                                             Job.number_of_errors == 0, Job.job_status_id == 4)

        if review_data.count() == 2 and generate_ef.count() > 0:
            data = {
                "message": "The current progress of this submission ID is on /v1/reviewData/ page.",
                "step": "5"
            }
            return JsonResponse.create(StatusCode.OK, data)

        # /v1/generateEF/
        if generate_ef.count() > 0:
            data = {
                "message": "The current progress of this submission ID is on /v1/generateEF/ page.",
                "step": "4"
            }
            return JsonResponse.create(StatusCode.OK, data)

        validate_cross_file = sess.query(Job).filter(Job.submission_id == submission_id,
                                                     Job.file_type_id.in_([4, 5]), Job.job_type_id == 2,
                                                     Job.number_of_errors == 0, Job.file_size.isnot(None),
                                                     Job.job_status_id == 4)

        generate_files = sess.query(Job).filter(Job.submission_id == submission_id,
                                                Job.file_type_id.in_([1, 2, 3]), Job.job_type_id == 2,
                                                Job.number_of_errors == 0, Job.file_size.isnot(None),
                                                Job.job_status_id == 4)

        # /v1/validateCrossFile/
        if validate_cross_file.count() == 2 and generate_files.count() == 3:
            data = {
                "message": "The current progress of this submission ID is on /v1/validateCrossFile/ page.",
                "step": "3"
            }
            return JsonResponse.create(StatusCode.OK, data)

        # /v1/generateFiles/
        if generate_files.count() == 3:
            data = {
                "message": "The current progress of this submission ID is on /v1/generateFiles/ page.",
                "step": "2"
            }
            return JsonResponse.create(StatusCode.OK, data)

        # /v1/validateData/
        validate_data = sess.query(Job).filter(Job.submission_id == submission_id,
                                               Job.file_type_id.in_([1, 2, 3]), Job.job_type_id == 2,
                                               Job.number_of_errors != 0, Job.file_size.isnot(None))
        check_header_errors = sess.query(Job).filter(Job.submission_id == submission_id,
                                                     Job.file_type_id.in_([1, 2, 3]), Job.job_type_id == 2,
                                                     Job.job_status_id != 4, Job.file_size.isnot(None))
        if validate_data.count() or check_header_errors.count() > 0:
            data = {
                    "message": "The current progress of this submission ID is on /v1/validateData/ page.",
                    "step": "1"
            }
            return JsonResponse.create(StatusCode.OK, data)

        else:
            return JsonResponse.error(ValueError("The submisssion ID returns no response"), StatusCode.CLIENT_ERROR)
def setupJobTrackerDB():
    """Create job tracker tables from model metadata."""
    with createApp().app_context():
        sess = GlobalDB.db().session
        insertCodes(sess)
        sess.commit()
def setupValidationDB():
    """Create validation tables from model metadata and do initial inserts."""
    with createApp().app_context():
        sess = GlobalDB.db().session
        insertCodes(sess)
        sess.commit()
示例#57
0
 def purge():
     sess = GlobalDB.db().session
     sess.query(SQS).delete()
     sess.commit()
def load_program_activity_data(base_path, force_reload=False, export=False):
    """ Load program activity lookup table.

        Args:
            base_path: directory of domain config files
            force_reload: whether or not to force a reload
            export: whether or not to export a public copy of the file
    """
    now = datetime.datetime.now()
    metrics_json = {
        'script_name': 'load_program_activity.py',
        'start_time': str(now),
        'records_received': 0,
        'duplicates_dropped': 0,
        'invalid_records_dropped': 0,
        'records_deleted': 0,
        'records_inserted': 0
    }
    dropped_count = 0

    logger.info('Checking PA upload dates to see if we can skip.')
    last_upload = get_date_of_current_pa_upload(base_path)
    if not (last_upload > get_stored_pa_last_upload()) and not force_reload:
        logger.info('Skipping load as it\'s already been done')
    else:
        logger.info('Getting the progrma activity file')
        program_activity_file = get_program_activity_file(base_path)

        logger.info('Loading program activity: {}'.format(PA_FILE_NAME))

        with create_app().app_context():
            sess = GlobalDB.db().session
            try:
                raw_data = pd.read_csv(program_activity_file, dtype=str)
            except pd.io.common.EmptyDataError:
                log_blank_file()
                exit_if_nonlocal(4)  # exit code chosen arbitrarily, to indicate distinct failure states
                return
            headers = set([header.upper() for header in list(raw_data)])

            if not VALID_HEADERS.issubset(headers):
                logger.error('Missing required headers. Required headers include: %s' % str(VALID_HEADERS))
                exit_if_nonlocal(4)
                return

            try:
                dropped_count, data = clean_data(
                    raw_data,
                    ProgramActivity,
                    {'fyq': 'fiscal_year_period', 'agency_code': 'agency_id', 'allocation_id': 'allocation_transfer_id',
                     'account_code': 'account_number', 'pa_code': 'program_activity_code',
                     'pa_title': 'program_activity_name'},
                    {'program_activity_code': {'pad_to_length': 4}, 'agency_id': {'pad_to_length': 3},
                     'allocation_transfer_id': {'pad_to_length': 3, 'keep_null': True},
                     'account_number': {'pad_to_length': 4}},
                    ['agency_id', 'program_activity_code', 'account_number', 'program_activity_name'],
                    True
                )
            except FailureThresholdExceededException as e:
                if e.count == 0:
                    log_blank_file()
                    exit_if_nonlocal(4)
                    return
                else:
                    logger.error('Loading of program activity file failed due to exceeded failure threshold. '
                                 'Application tried to drop {} rows'.format(e.count))
                    exit_if_nonlocal(5)
                    return

            metrics_json['records_deleted'] = sess.query(ProgramActivity).delete()
            metrics_json['invalid_records_dropped'] = dropped_count

            # Lowercase Program Activity Name
            data['program_activity_name'] = data['program_activity_name'].apply(lambda x: lowercase_or_notify(x))
            # Convert FYQ to FYP
            data['fiscal_year_period'] = data['fiscal_year_period'].apply(lambda x: convert_fyq_to_fyp(x))

            # because we're only loading a subset of program activity info, there will be duplicate records in the
            # dataframe. this is ok, but need to de-duped before the db load. We also need to log them.
            base_count = len(data.index)
            metrics_json['records_received'] = base_count
            data.drop_duplicates(inplace=True)

            dupe_count = base_count - len(data.index)
            logger.info('Dropped {} duplicate rows.'.format(dupe_count))
            metrics_json['duplicates_dropped'] = dupe_count

            # insert to db
            table_name = ProgramActivity.__table__.name
            num = insert_dataframe(data, table_name, sess.connection())
            sess.commit()

            if export:
                export_public_pa(raw_data)

        end_time = datetime.datetime.now()
        update_external_data_load_date(now, end_time, 'program_activity')
        update_external_data_load_date(last_upload, end_time, 'program_activity_upload')
        logger.info('{} records inserted to {}'.format(num, table_name))
        metrics_json['records_inserted'] = num

        metrics_json['duration'] = str(end_time - now)

    with open('load_program_activity_metrics.json', 'w+') as metrics_file:
        json.dump(metrics_json, metrics_file)

    if dropped_count > 0:
        exit_if_nonlocal(3)
        return
示例#59
0

if __name__ == '__main__':
    parser = get_parser()
    args = parser.parse_args()

    historic = args.historic
    local = args.local
    monthly = args.monthly
    daily = args.daily
    benchmarks = args.benchmarks
    update = args.update

    with create_app().app_context():
        configure_logging()
        sess = GlobalDB.db().session

        if monthly and daily:
            logger.error("For loading a single local file, you must provide either monthly or daily.")
            sys.exit(1)
        if historic and update:
            logger.error("For multiple file loads you must choose either historic or update.")
            sys.exit(1)
        elif (monthly or daily) and local:
            logger.error("Local directory specified with a local file.")
            sys.exit(1)
        elif monthly:
            parse_sam_file(monthly, sess=sess, monthly=True, benchmarks=benchmarks)
        elif daily:
            parse_sam_file(daily, sess=sess, benchmarks=benchmarks)
        else:
def setup_submission_type_db():
    """Create job tracker tables from model metadata."""
    with create_app().app_context():
        sess = GlobalDB.db().session
        insert_codes(sess)
        sess.commit()