def retrieve_cached_file_generation(job, agency_type, agency_code):
    """ Retrieves a cached FileGeneration for the D file request, if there is one.

        Args:
            job: Upload Job for the generation file
            agency_type: Type of Agency to generate files by: "awarding" or "funding"
            agency_code: Agency code to generate file for

        Returns:
            FileGeneration object matching the criteria, or None
    """
    sess = GlobalDB.db().session
    logger.info({'message': 'Checking for a cached FileGeneration to pull file from', 'message_type': 'BrokerInfo',
                 'submission_id': job.submission_id, 'job_id': job.job_id, 'file_type': job.file_type.letter_name})

    # find current date and date of last FPDS pull
    current_date = datetime.now().date()
    last_update = sess.query(FPDSUpdate).one_or_none()
    fpds_date = last_update.update_date if last_update else current_date

    # check if a cached FileGeneration already exists using these criteria
    file_generation = None
    file_gen = sess.query(FileGeneration).filter(
        FileGeneration.start_date == job.start_date, FileGeneration.end_date == job.end_date,
        FileGeneration.agency_code == agency_code,  FileGeneration.agency_type == agency_type,
        FileGeneration.file_type == job.file_type.letter_name, FileGeneration.is_cached_file.is_(True)).one_or_none()

    if file_gen and (file_gen.file_type == 'D1' and file_gen.request_date < fpds_date):
        # Uncache expired D1 FileGeneration
        file_gen.is_cached_file = False
        sess.commit()
    elif file_gen:
        file_generation = file_gen

    return file_generation
def check_generation_prereqs(submission_id, file_type):
    """ Make sure the prerequisite jobs for this file type are complete without errors.

        Args:
            submission_id: the submission id for which we're checking file generation prerequisites
            file_type: the type of file being generated

        Returns:
            A boolean indicating if the job has no incomplete prerequisites (True if the job is clear to start)
    """
    sess = GlobalDB.db().session
    prereq_query = sess.query(Job).filter(Job.submission_id == submission_id,
                                          or_(Job.job_status_id != lookups.JOB_STATUS_DICT['finished'],
                                              Job.number_of_errors > 0))

    # Check cross-file validation if generating E or F
    if file_type in ['E', 'F']:
        unfinished_prereqs = prereq_query.filter(Job.job_type_id == lookups.JOB_TYPE_DICT['validation']).count()
    # Check A, B, C files if generating a D file
    elif file_type in ['D1', 'D2']:
        unfinished_prereqs = prereq_query.filter(Job.file_type_id.in_(
            [lookups.FILE_TYPE_DICT['appropriations'], lookups.FILE_TYPE_DICT['program_activity'],
             lookups.FILE_TYPE_DICT['award_financial']])).count()
    else:
        raise ResponseException('Invalid type for file generation', StatusCode.CLIENT_ERROR)

    return unfinished_prereqs == 0
def create_generation_job(file_type_name, start_date, end_date):
    """ Add details to jobs for generating files

        Args:
            file_type_name: the name of the file type being generated
            job: the generation job, None if it is a detached generation
            start_date: The start date for the generation job, only used for detached files
            end_date: The end date for the generation job, only used for detached files

        Returns:
            the file generation job
    """
    sess = GlobalDB.db().session

    # Create a new job for a detached generation
    job = Job(job_type_id=lookups.JOB_TYPE_DICT['file_upload'], user_id=g.user.user_id,
              file_type_id=lookups.FILE_TYPE_DICT[file_type_name], start_date=start_date, end_date=end_date)
    sess.add(job)

    # Update the job details
    job.message = None
    job.job_status_id = lookups.JOB_STATUS_DICT["ready"]
    sess.commit()
    sess.refresh(job)

    return job
    def set_skip_guide(self):
        """ Set current user's skip guide parameter

            Returns:
                JsonResponse object containing results of setting the skip guide or details of the error that occurred.
                Possible errors include the request not containing a skip_guide parameter or it not being a boolean
                value
        """
        sess = GlobalDB.db().session
        request_dict = RequestDictionary.derive(self.request)
        try:
            if 'skip_guide' not in request_dict:
                raise ResponseException(
                    "Must include skip_guide parameter",
                    StatusCode.CLIENT_ERROR
                )
            skip_guide = str(request_dict['skip_guide']).lower()
            if skip_guide not in ("true", "false"):
                raise ResponseException(
                    "skip_guide must be true or false",
                    StatusCode.CLIENT_ERROR
                )
            g.user.skip_guide = skip_guide == "true"
        except ResponseException as exc:
            return JsonResponse.error(exc, exc.status)
        sess.commit()
        return JsonResponse.create(StatusCode.OK, {"message": "skip_guide set successfully", "skip_guide": skip_guide})
def main():
    sess = GlobalDB.db().session

    # delete any data in the PublishedAwardFinancialAssistance table
    logger.info('deleting PublishedAwardFinancialAssistance data')
    sess.query(PublishedAwardFinancialAssistance).delete(synchronize_session=False)
    sess.commit()

    if CONFIG_BROKER["use_aws"]:
        s3connection = boto.s3.connect_to_region(CONFIG_BROKER['aws_region'])
        s3bucket = s3connection.lookup(CONFIG_BROKER['archive_bucket'])
        for key in s3bucket.list():
            if re.match('^\d{4}_All_(DirectPayments|Grants|Insurance|Loans|Other)_Full_\d{8}.csv.zip', key.name):
                file_path = key.generate_url(expires_in=600)
                parse_fabs_file(urllib.request.urlopen(file_path), sess)
    else:
        base_path = os.path.join(CONFIG_BROKER["path"], "dataactvalidator", "config", "fabs")
        file_list = [f for f in os.listdir(base_path)]
        for file in file_list:
            if re.match('^\d{4}_All_(DirectPayments|Grants|Insurance|Loans|Other)_Full_\d{8}.csv.zip', file):
                parse_fabs_file(open(os.path.join(base_path, file)), sess)

    set_active_rows(sess)

    logger.info("Historical FABS script complete")
def read_zips():
    with create_app().app_context():
        sess = GlobalDB.db().session

        # delete old values in case something changed and one is now invalid
        sess.query(Zips).delete(synchronize_session=False)
        sess.commit()

        if CONFIG_BROKER["use_aws"]:
            s3connection = boto.s3.connect_to_region(CONFIG_BROKER['aws_region'])
            s3bucket = s3connection.lookup(CONFIG_BROKER['sf_133_bucket'])
            zip_folder = CONFIG_BROKER["zip_folder"] + "/"
            for key in s3bucket.list(prefix=zip_folder):
                if key.name != zip_folder:
                    zip_4_file_path = key.generate_url(expires_in=600)
                    parse_zip4_file(urllib.request.urlopen(zip_4_file_path), sess)

            # parse remaining 5 digit zips that weren't in the first file
            citystate_file = s3bucket.get_key("ctystate.txt").generate_url(expires_in=600)
            parse_citystate_file(urllib.request.urlopen(citystate_file), sess)
        else:
            base_path = os.path.join(CONFIG_BROKER["path"], "dataactvalidator", "config", CONFIG_BROKER["zip_folder"])
            # creating the list while ignoring hidden files on mac
            file_list = [f for f in os.listdir(base_path) if not re.match('^\.', f)]
            for file in file_list:
                parse_zip4_file(open(os.path.join(base_path, file)), sess)

            # parse remaining 5 digit zips that weren't in the first file
            citystate_file = os.path.join(CONFIG_BROKER["path"], "dataactvalidator", "config", "ctystate.txt")
            parse_citystate_file(open(citystate_file), sess)

        logger.info("Zipcode script complete")
def load_frec(file_name):
    """ Load FREC (high-level agency names) lookup table.

        Args:
            file_name: path/url to the file to be read
    """
    sess = GlobalDB.db().session
    models = {frec.frec_code: frec for frec in sess.query(FREC)}

    # read FREC values from csv
    data = pd.read_csv(file_name, dtype=str)

    # clean data
    data = clean_data(
        data,
        FREC,
        {"frec": "frec_code", "cgac_agency_code": "cgac_code", "frec_entity_description": "agency_name",
         "agency_abbreviation": "agency_abbreviation"},
        {"frec": {"keep_null": False}, "cgac_code": {"pad_to_length": 3}, "frec_code": {"pad_to_length": 4}}
    )
    # de-dupe
    data.drop_duplicates(subset=['frec_code'], inplace=True)
    # create foreign key dicts
    cgac_dict = {str(cgac.cgac_code): cgac.cgac_id for
                 cgac in sess.query(CGAC).filter(CGAC.cgac_code.in_(data["cgac_code"])).all()}

    # insert to db
    delete_missing_frecs(models, data)
    update_frecs(models, data, cgac_dict)
    sess.add_all(models.values())
    sess.commit()

    logger.info('%s FREC records inserted', len(models))
    def setUpClass(cls):
        """Set up class-wide resources like submissions and jobs."""
        super(UserTests, cls).setUpClass()

        with create_app().app_context():
            sess = GlobalDB.db().session

            # Add submissions for agency user
            sess.query(Submission).filter(Submission.user_id == cls.agency_user_id).delete()
            sess.commit()
            for i in range(0, 6):
                sub = Submission(user_id=cls.agency_user_id)
                sub.reporting_start_date = datetime(2015, 10, 1)
                sub.reporting_end_date = datetime(2015, 12, 31)
                sub.cgac_code = cls.admin_cgac_code
                sess.add(sub)
                sess.commit()
                if i == 0:
                    cls.submission_id = sub.submission_id

            # Add job to first submission
            job = Job(
                submission_id=cls.submission_id,
                job_status_id=JOB_STATUS_DICT['running'],
                job_type_id=JOB_TYPE_DICT['file_upload'],
                file_type_id=FILE_TYPE_DICT['appropriations']
            )
            sess.add(job)
            sess.commit()
            cls.uploadId = job.job_id
def setup_emails():
    """Create email templates from model metadata."""
    with create_app().app_context():
        sess = GlobalDB.db().session

        # insert email template types
        type_list = [
            ('review_submission', '')
        ]
        for t in type_list:
            email_id = sess.query(
                EmailTemplateType.email_template_type_id).filter(
                EmailTemplateType.name == t[0]).one_or_none()
            if not email_id:
                email_type = EmailTemplateType(name=t[0], description=t[1])
                sess.add(email_type)

        sess.commit()

        # insert email templates

        # Submission Review
        template = ("[REV_USER_NAME] has shared a DATA Act broker submission with you from [REV_AGENCY]. Click "
                    "<a href='[REV_URL]'>here</a> to review their submission. For questions or comments, please visit "
                    "the Service Desk at https://servicedesk.usaspending.gov/ or e-mail [email protected].")
        load_email_template(sess, "DATA Act Broker - Submission Ready for Review", template, "review_submission")
def update_offices(csv_path):
    """ Load office data from the provided CSV and replace/insert any office lookups.

        Args:
            csv_path: path/url to the file to read from
    """
    sess = GlobalDB.db().session

    data = clean_office(csv_path)
    add_existing_id(data)

    old_data = data[data['existing_id'].notnull()]
    del old_data['existing_id']

    new_data = data[data['existing_id'].isnull()]
    del new_data['existing_id']

    # instead of using the pandas to_sql dataframe method like some of the other domain load processes, iterate through
    # the dataframe rows so we can load using the orm model (note: toyed with the SQLAlchemy bulk load options but
    # ultimately decided not to go outside the unit of work for the sake of a performance gain)
    for _, row in old_data.iterrows():
        sess.query(FPDSContractingOffice).filter_by(contracting_office_code=row['contracting_office_code'])\
            .update(row, synchronize_session=False)

    for _, row in new_data.iterrows():
        sess.add(FPDSContractingOffice(**row))

    sess.commit()
    logger.info('%s records in CSV, %s existing', len(data.index), sum(data['existing_id'].notnull()))
Пример #11
0
def submission_procurements(submission_id):
    """Fetch procurements and subcontracts"""
    sess = GlobalDB.db().session

    logger.debug('Starting submission procurements')

    award_proc_sub = sess.query(AwardProcurement.piid, AwardProcurement.parent_award_id,
                                AwardProcurement.naics_description, AwardProcurement.awarding_sub_tier_agency_c,
                                AwardProcurement.submission_id).\
        filter(AwardProcurement.submission_id == submission_id).distinct().cte("award_proc_sub")

    results = sess.query(award_proc_sub, FSRSProcurement, FSRSSubcontract).\
        filter(FSRSProcurement.contract_number == award_proc_sub.c.piid).\
        filter(FSRSProcurement.idv_reference_number.isnot_distinct_from(award_proc_sub.c.parent_award_id)).\
        filter(FSRSProcurement.contracting_office_aid == award_proc_sub.c.awarding_sub_tier_agency_c).\
        filter(FSRSSubcontract.parent_id == FSRSProcurement.id)

    # The cte returns a set of columns, not an AwardProcurement object, so we have to unpack each column
    for award_piid, award_parent_id, award_naics_desc, award_sub_tier, award_sub_id, proc, sub in results:
        # need to combine those columns again here so we can get a proper ModelRow
        award = AwardProcurement(piid=award_piid, parent_award_id=award_parent_id, naics_description=award_naics_desc,
                                 awarding_sub_tier_agency_c=award_sub_tier, submission_id=award_sub_id)
        yield ModelRow(award, proc, sub, naics_desc=award.naics_description)

    logger.debug('Finished submission procurements')
def main():
    sess = GlobalDB.db().session
    start = datetime.datetime.now()
    logger.info("FPDS IDV delete started")

    # get and read the file
    del_file = get_delete_file()
    data = pd.read_csv(del_file, dtype=str, encoding='utf_8_sig')

    # Clean up the data so it's usable
    data = clean_delete_data(data)

    # Gather list of records to delete
    gather_start = datetime.datetime.now()
    logger.info("Starting gathering of records to delete.")
    delete_list, delete_dict = get_deletes(sess, data)
    gather_end = datetime.datetime.now()
    logger.info("Finished gathering records in {} seconds. Total records to delete: {}".format(gather_end-gather_start,
                                                                                               len(delete_list)))
    # Delete records
    logger.info("Deleting records")
    delete_records(sess, delete_list, delete_dict)
    sess.commit()

    end = datetime.datetime.now()
    logger.info("FPDS IDV delete finished in %s seconds", end-start)
def insert_from_table(file_type_id, submission_id):
    """ Insert the data from the base staging table into the corresponding Certified table.

        Params:
            file_type_id:  Database file type ID for files A, B, or C
            submission_id: Database ID for the submission being loaded
    """
    sess = GlobalDB.db().session
    logger.info('Copying submission {} data from base table into {}'.format(submission_id,
                                                                            FTI_TABLENAME_DICT[file_type_id]))

    table_type = FTI_BASETABLE_DICT[file_type_id].__table__.name
    column_list = [col.key for col in FTI_BASETABLE_DICT[file_type_id].__table__.columns]
    column_list.remove('created_at')
    column_list.remove('updated_at')
    column_list.remove(table_type + '_id')
    col_string = ", ".join(column_list)

    executed = sess.execute(
        "INSERT INTO certified_{} (created_at, updated_at, {}) "
        "SELECT NOW() AS created_at, NOW() AS updated_at, {} "
        "FROM {} "
        "WHERE submission_id={}".format(table_type, col_string, col_string, table_type, submission_id))
    sess.commit()

    logger.info('Loaded {} records into the {} table'.format(executed.rowcount, FTI_TABLENAME_DICT[file_type_id]))
 def list_all_agencies():
     sess = GlobalDB.db().session
     cgacs = sess.query(CGAC).all()
     agency_list = [{'agency_name': cgac.agency_name, 'cgac_code': cgac.cgac_code} for cgac in cgacs]
     frecs = sess.query(FREC).all()
     shared_list = [{'agency_name': frec.agency_name, 'frec_code': frec.frec_code} for frec in frecs]
     return JsonResponse.create(StatusCode.OK, {'agency_list': agency_list, 'shared_agency_list': shared_list})
def start_a_generation(job, start_date, end_date, agency_code):
    """ Validates the start and end dates of the generation and sends the job information to SQS.

        Args:
            job: File generation job to start
            start_date: String to parse as the start date of the generation
            end_date: String to parse as the end date of the generation
            agency_code: Agency code for A file generations
    """
    if not (StringCleaner.is_date(start_date) and StringCleaner.is_date(end_date)):
        raise ResponseException("Start or end date cannot be parsed into a date of format MM/DD/YYYY",
                                StatusCode.CLIENT_ERROR)

    # Update the Job's start and end dates
    sess = GlobalDB.db().session
    job.start_date = start_date
    job.end_date = end_date
    sess.commit()

    mark_job_status(job.job_id, "waiting")

    file_type = job.file_type.letter_name
    log_data = {'message': 'Sending {} file generation job {} to Validator in SQS'.format(file_type, job.job_id),
                'message_type': 'BrokerInfo', 'job_id': job.job_id, 'file_type': file_type}
    logger.info(log_data)

    # Set SQS message attributes
    message_attr = {'agency_code': {'DataType': 'String', 'StringValue': agency_code}}

    # Add job_id to the SQS job queue
    queue = sqs_queue()
    msg_response = queue.send_message(MessageBody=str(job.job_id), MessageAttributes=message_attr)

    log_data['message'] = 'SQS message response: {}'.format(msg_response)
    logger.debug(log_data)
def write_file_error(job_id, filename, error_type, extra_info=None):
    """ Write a file-level error to the file table

    Args:
        job_id: ID of job in job tracker
        filename: name of error report in S3
        error_type: type of error, value will be mapped to ValidationError class
        extra_info: list of extra information to be included in file
    """
    sess = GlobalDB.db().session
    try:
        int(job_id)
    except:
        raise ValueError("".join(["Bad jobId: ", str(job_id)]))

    # Get File object for this job ID or create it if it doesn't exist
    file_rec = create_file_if_needed(job_id, filename)

    # Mark error type and add header info if present
    file_rec.file_status_id = FILE_STATUS_DICT[ValidationError.get_error_type_string(error_type)]
    if extra_info is not None:
        if "missing_headers" in extra_info:
            file_rec.headers_missing = extra_info["missing_headers"]
        if "duplicated_headers" in extra_info:
            file_rec.headers_duplicated = extra_info["duplicated_headers"]

    sess.add(file_rec)
    sess.commit()
def main():
    parser = argparse.ArgumentParser(description='Update contract transaction and/or fabs rows based on updates to the '
                                                 'agency list')
    parser.add_argument('-a', '--missing_agency', help='Perform an update on 999 agency codes', action='store_true',
                        required=False, default=False)
    parser.add_argument('-s', '--subtier_code', help='Select specific subtier to update. Must be a 4-digit code',
                        type=str, required=False)
    parser.add_argument('-t', '--tables', help='Which tables (fabs, fpds, or both) to update. Defaults to both.',
                        required=False, default='both', choices=['fabs', 'fpds', 'both'])
    args = parser.parse_args()

    if not args.subtier_code and not args.missing_agency:
        logger.error('Missing either subtier_code or missing_agency argument')
    elif args.subtier_code and len(args.subtier_code) != 4:
        logger.error('Subtier not a correct format, must be 4 digits')
    else:
        sess = GlobalDB.db().session

        if args.tables in ('fpds', 'both'):
            update_table(sess, 'awarding', 'detached_award_procurement', args)
            update_table(sess, 'funding', 'detached_award_procurement', args)
            logger.info("Procurement Update Complete")

        if args.tables in ('fabs', 'both'):
            update_table(sess, 'awarding', 'published_award_financial_assistance', args)
            update_table(sess, 'funding', 'published_award_financial_assistance', args)
            logger.info("Award Financial Assistance Update Complete")
    def delete_submission(submission):
        """ Deletes all data associated with the specified submission
        NOTE: THERE IS NO WAY TO UNDO THIS """

        if submission.publish_status_id != PUBLISH_STATUS_DICT['unpublished']:
            return JsonResponse.error(ValueError("Submissions that have been certified cannot be deleted"),
                                      StatusCode.CLIENT_ERROR)

        sess = GlobalDB.db().session

        # Check if the submission has any jobs that are currently running, if so, do not allow deletion
        jobs = sess.query(Job).filter(Job.submission_id == submission.submission_id,
                                      Job.job_status_id == JOB_STATUS_DICT['running']).all()

        if jobs:
            return JsonResponse.error(ValueError("Submissions with running jobs cannot be deleted"),
                                      StatusCode.CLIENT_ERROR)

        sess.query(SubmissionSubTierAffiliation).filter(
            SubmissionSubTierAffiliation.submission_id == submission.submission_id).delete(
                synchronize_session=False)
        sess.query(Submission).filter(Submission.submission_id == submission.submission_id).delete(
            synchronize_session=False)
        sess.expire_all()

        return JsonResponse.create(StatusCode.OK, {"message": "Success"})
def load_cgac(file_name):
    """ Load CGAC (high-level agency names) lookup table.

        Args:
            file_name: path/url to the file to be read
    """
    sess = GlobalDB.db().session
    models = {cgac.cgac_code: cgac for cgac in sess.query(CGAC)}

    # read CGAC values from csv
    data = pd.read_csv(file_name, dtype=str)
    # clean data
    data = clean_data(
        data,
        CGAC,
        {"cgac_agency_code": "cgac_code", "agency_name": "agency_name",
         "agency_abbreviation": "agency_abbreviation"},
        {"cgac_code": {"pad_to_length": 3}}
    )
    # de-dupe
    data.drop_duplicates(subset=['cgac_code'], inplace=True)

    delete_missing_cgacs(models, data)
    update_cgacs(models, data)
    sess.add_all(models.values())
    sess.commit()

    logger.info('%s CGAC records inserted', len(models))
    def submit_files():
        file_manager = FileHandler(request, is_local=is_local, server_path=server_path)

        sess = GlobalDB.db().session

        start_date = request.json.get('reporting_period_start_date')
        end_date = request.json.get('reporting_period_end_date')
        is_quarter = request.json.get('is_quarter_format', False)

        if not (start_date is None or end_date is None):
            formatted_start_date, formatted_end_date = FileHandler.check_submission_dates(start_date,
                                                                                          end_date, is_quarter)

            submissions = sess.query(Submission).filter(
                Submission.cgac_code == request.json.get('cgac_code'),
                Submission.frec_code == request.json.get('frec_code'),
                Submission.reporting_start_date == formatted_start_date,
                Submission.reporting_end_date == formatted_end_date,
                Submission.is_quarter_format == request.json.get('is_quarter'),
                Submission.publish_status_id != PUBLISH_STATUS_DICT['unpublished'])

            if 'existing_submission_id' in request.json:
                submissions.filter(Submission.submission_id !=
                                   request.json['existing_submission_id'])

            submissions = submissions.order_by(desc(Submission.created_at))

            if submissions.count() > 0:
                data = {
                        "message": "A submission with the same period already exists.",
                        "submissionId": submissions[0].submission_id
                }
                return JsonResponse.create(StatusCode.CLIENT_ERROR, data)

        return file_manager.submit(create_credentials)
def main():
    sess = GlobalDB.db().session

    parser = argparse.ArgumentParser(description='Update business categories for existing transaction data')
    parser.add_argument('-fpds', help='Update only FPDS business categories', action='store_true')
    parser.add_argument('-fabs', help='Update only FABS business categories', action='store_true')
    args = parser.parse_args()

    overall_start = datetime.now()
    logger.info("Starting business categories updates")

    start = datetime.now()
    logger.info("Recreating business category SQL functions")
    sess.execute(BUSINESS_CATEGORY_FUNCTION_SQL)
    sess.commit()
    logger.info("Finished recreating business category SQL functions in %s seconds" % str(datetime.now() - start))

    if not (args.fpds and args.fabs):
        update_fpds_business_categories(sess)
        update_fabs_business_categories(sess)
    elif args.fpds:
        update_fpds_business_categories(sess)
    elif args.fabs:
        update_fabs_business_categories(sess)

    logger.info("Completed business categories updates in %s seconds" % str(datetime.now() - overall_start))
def main():
    """ Loads DUNS from the DUNS export file (comprised of DUNS pre-2014) """
    parser = argparse.ArgumentParser(description='Adding historical DUNS to Broker.')
    parser.add_argument('-size', '--block_size', help='Number of rows to batch load', type=int,
                        default=10000)
    args = parser.parse_args()

    sess = GlobalDB.db().session
    client = sam_config_is_valid()

    logger.info('Retrieving historical DUNS file')
    start = datetime.now()
    if CONFIG_BROKER["use_aws"]:
        s3_client = boto3.client('s3', region_name=CONFIG_BROKER['aws_region'])
        duns_file = s3_client.generate_presigned_url('get_object', {'Bucket': CONFIG_BROKER['archive_bucket'],
                                                                    'Key': "DUNS_export_deduped.csv"}, ExpiresIn=10000)
    else:
        duns_file = os.path.join(CONFIG_BROKER["broker_files"], "DUNS_export_deduped.csv")

    if not duns_file:
        raise OSError("No DUNS_export_deduped.csv found.")

    logger.info("Retrieved historical DUNS file in {} s".format((datetime.now()-start).total_seconds()))

    try:
        run_duns_batches(duns_file, sess, client, args.block_size)
    except Exception as e:
        logger.exception(e)
        sess.rollback()

    logger.info("Updating historical DUNS complete")
    sess.close()
def get_submission_data(submission, file_type=''):
    """ Get data for the submission specified

        Args:
            submission: submission to retrieve metadata for
            file_type: the type of job to retrieve metadata for

        Returns:
            JsonResponse containing the error information or the object containing metadata for all relevant file types
    """
    sess = GlobalDB.db().session
    file_type = file_type.lower()

    # Make sure the file type provided is valid
    if file_type and file_type not in FILE_TYPE_DICT and file_type != 'cross':
        return JsonResponse.error(ValueError(file_type + ' is not a valid file type'), StatusCode.CLIENT_ERROR)

    # Make sure the file type provided is valid for the submission type
    is_fabs = submission.d2_submission
    if file_type and (is_fabs and file_type != 'fabs') or (not is_fabs and file_type == 'fabs'):
        return JsonResponse.error(ValueError(file_type + ' is not a valid file type for this submission'),
                                  StatusCode.CLIENT_ERROR)

    job_query = sess.query(Job).filter(Job.submission_id == submission.submission_id)
    if not file_type:
        relevant_job_types = (JOB_TYPE_DICT['csv_record_validation'], JOB_TYPE_DICT['validation'])
        job_query = job_query.filter(Job.job_type_id.in_(relevant_job_types))
    elif file_type == 'cross':
        job_query = job_query.filter(Job.job_type_id == JOB_TYPE_DICT['validation'])
    else:
        job_query = job_query.filter(Job.file_type_id == FILE_TYPE_DICT[file_type])

    job_dict = {'jobs': [job_to_dict(job) for job in job_query]}
    return JsonResponse.create(StatusCode.OK, job_dict)
def get_fabs_meta(submission_id):
    """Return the total rows, valid rows, publish date, and publish file for FABS submissions"""
    sess = GlobalDB.db().session

    # get row counts from the DetachedAwardFinancialAssistance table
    dafa = DetachedAwardFinancialAssistance
    total_rows = sess.query(dafa).filter(dafa.submission_id == submission_id)
    valid_rows = total_rows.filter(dafa.is_valid)

    # retrieve the published data and file
    submission = sess.query(Submission).filter(Submission.submission_id == submission_id).one()
    publish_date, published_file = None, None
    certify_data = get_lastest_certified_date(submission, is_fabs=True)

    try:
        iter(certify_data)
    except TypeError:
        publish_date = certify_data
    else:
        publish_date, file_path = certify_data
        if CONFIG_BROKER["use_aws"] and file_path:
            path, file_name = file_path.rsplit('/', 1)  # split by last instance of /
            published_file = S3Handler().get_signed_url(path=path, file_name=file_name,
                                                        bucket_route=CONFIG_BROKER['certified_bucket'],
                                                        url_mapping=CONFIG_BROKER["certified_bucket_mapping"],
                                                        method="get_object")
        elif file_path:
            published_file = file_path

    return {
        'valid_rows': valid_rows.count(),
        'total_rows': total_rows.count(),
        'publish_date': publish_date.strftime('%-I:%M%p %m/%d/%Y') if publish_date else None,
        'published_file': published_file
    }
Пример #25
0
 def before_request():
     # Set global value for local
     g.is_local = local
     sess = GlobalDB.db().session
     # setup user
     g.user = None
     if session.get('name') is not None:
         g.user = sess.query(User).filter_by(user_id=session['name']).one_or_none()
def update_tas_ids(model_class, submission_id):
    sess = GlobalDB.db().session
    submission = sess.query(Submission).filter_by(submission_id=submission_id).one()

    subquery = matching_cars_subquery(sess, model_class, submission.reporting_start_date, submission.reporting_end_date)
    sess.query(model_class).filter_by(submission_id=submission_id).\
        update({getattr(model_class, 'tas_id'): subquery}, synchronize_session=False)
    sess.commit()
def test_w_perms_broker_user(create_session_mock, max_dict_mock, database, monkeypatch):
    ah = max_login_func(create_session_mock, max_dict_mock, monkeypatch, MAX_RESPONSE_W_PERMS)
    res = ah.max_login({})
    sess = GlobalDB.db().session
    # This is to prevent an integrity error with other tests that create users.
    sess.query(func.lower(User.email) == func.lower("*****@*****.**")).delete()
    sess.commit()
    assert res is True
def get_window():
    sess = GlobalDB.db().session

    curr_date = datetime.now().date()

    return sess.query(SubmissionWindow).filter(
                                            SubmissionWindow.start_date <= curr_date,
                                            SubmissionWindow.end_date >= curr_date)
    def check_year_and_quarter(cgac_code, frec_code, reporting_fiscal_year, reporting_fiscal_period):
        """ Check if cgac (or frec) code, year, and quarter already has a published submission """
        if not cgac_code and not frec_code:
            return JsonResponse.error(ValueError("CGAC or FR Entity Code required"), StatusCode.CLIENT_ERROR)

        sess = GlobalDB.db().session
        return find_existing_submissions_in_period(sess, cgac_code, frec_code, reporting_fiscal_year,
                                                   reporting_fiscal_period)
def load_tas(backfill_historic=False):
    """ Load TAS file into broker database.

        Args:
            backfill_historic: if set to true, this will only update certain columns if budget_function_code is null
    """
    # read TAS file to dataframe, to make sure all is well with the file before firing up a db transaction
    tas_files = []

    if CONFIG_BROKER["use_aws"]:
        # Storing version dictionaries in the list to prevent getting all the links at once and possibly work with
        # expired AWS links
        s3connection = boto3.client('s3', region_name=CONFIG_BROKER['aws_region'])
        # list_object_versions returns the versions in reverse chronological order

        if not backfill_historic:
            # get the latest tas_file
            tas_files = [s3connection.generate_presigned_url(ClientMethod='get_object',
                                                             Params={'Bucket': CONFIG_BROKER['sf_133_bucket'],
                                                                     'Key': 'cars_tas.csv'}, ExpiresIn=600)]
        else:
            tas_files = s3connection.list_object_versions(Bucket=CONFIG_BROKER['sf_133_bucket'],
                                                          Prefix='cars_tas.csv')['Versions']
            # getting the latest file (see the reversed) from each day for performance and accuracy
            tas_files_grouped = {tas_file['LastModified'].date(): tas_file for tas_file in reversed(tas_files)}
            # sorting them back to chronological order
            tas_files = sorted([tas_file for date, tas_file in tas_files_grouped.items()],
                               key=lambda k: k['LastModified'])
    elif backfill_historic:
        logger.error('Unable to attain historical versions of cars_tas without aws access.')
        return
    else:
        tas_file = os.path.join(CONFIG_BROKER["path"], "dataactvalidator", "config", "cars_tas.csv")
        tas_files.append(tas_file)

    sess = GlobalDB.db().session
    for tas_file in reversed(tas_files):
        update_missing = missing_records(sess) if backfill_historic else []
        if backfill_historic and not update_missing:
            # no more missing, we're done here
            break
        if CONFIG_BROKER["use_aws"] and backfill_historic:
            # generate url from the version dict
            logger.info('Working with remote cars_tas.csv (from {})'.format(tas_file['LastModified']))
            tas_file = s3connection.generate_presigned_url(
                ClientMethod='get_object',
                Params={
                    'Bucket': CONFIG_BROKER['sf_133_bucket'],
                    'Key': 'cars_tas.csv',
                    'VersionId': tas_file['VersionId']
                },
                ExpiresIn=600
            )
        elif CONFIG_BROKER["use_aws"]:
            logger.info('Working with latest remote cars_tas.csv')
        else:
            logger.info('Working with local cars_tas.csv')
        update_tas_lookups(sess, tas_file, update_missing=update_missing)
 def delete(self):
     sess = GlobalDB.db().session
     sess.delete(self.sqs)
     sess.commit()
Пример #32
0
    def setUpClass(cls):
        """ Set up class-wide resources (test data) """
        super(DetachedUploadTests, cls).setUpClass()
        # TODO: refactor into a pytest fixture

        with create_app().app_context():
            # get the submission test users
            sess = GlobalDB.db().session
            cls.session = sess
            admin_user = sess.query(User).filter(
                User.email == cls.test_users['admin_user']).one()
            agency_user = sess.query(User).filter(
                User.email == cls.test_users['agency_user']).one()
            editfabs_user = sess.query(User).filter(
                User.email == cls.test_users['editfabs_user']).one()
            cls.admin_user_id = admin_user.user_id
            cls.agency_user_id = agency_user.user_id
            cls.agency_user_email = agency_user.email
            cls.editfabs_email = editfabs_user.email

            # setup submission/jobs data for test_check_status
            cls.d2_submission = cls.insert_submission(sess,
                                                      cls.admin_user_id,
                                                      cgac_code="SYS",
                                                      start_date="10/2015",
                                                      end_date="12/2015",
                                                      is_quarter=True)

            cls.d2_submission_2 = cls.insert_submission(sess,
                                                        cls.agency_user_id,
                                                        cgac_code="SYS",
                                                        start_date="10/2015",
                                                        end_date="12/2015",
                                                        is_quarter=True)

            cls.published_submission = cls.insert_submission(
                sess,
                cls.admin_user_id,
                cgac_code="SYS",
                start_date="10/2015",
                end_date="12/2015",
                is_quarter=True,
                publish_status_id=PUBLISH_STATUS_DICT["published"])

            cls.other_submission = cls.insert_submission(sess,
                                                         cls.admin_user_id,
                                                         cgac_code="SYS",
                                                         start_date="07/2015",
                                                         end_date="09/2015",
                                                         is_quarter=True,
                                                         d2_submission=False)

            cls.test_agency_user_submission_id = cls.insert_submission(
                sess,
                cls.agency_user_id,
                cgac_code="NOT",
                start_date="10/2015",
                end_date="12/2015",
                is_quarter=True,
                d2_submission=True)
            cls.insert_agency_user_submission_data(
                sess, cls.test_agency_user_submission_id)
Пример #33
0
def add_jobs_for_uploaded_file(upload_file, submission_id,
                               existing_submission):
    """ Add upload and validation jobs for a single filetype

    Arguments:
        upload_file: UploadFile named tuple
        submission_id: submission ID to attach to jobs
        existing_submission: true if we should update existing jobs rather than creating new ones

    Returns:
        the validation job id for this file type (if any)
        the upload job id for this file type
    """
    sess = GlobalDB.db().session

    file_type_id = FILE_TYPE_DICT[upload_file.file_type]
    validation_job_id = None

    # Create a file upload job or, for an existing submission, modify the
    # existing upload job.

    if existing_submission:
        # mark existing upload job as running
        upload_job = sess.query(Job).filter_by(
            submission_id=submission_id,
            file_type_id=file_type_id,
            job_type_id=JOB_TYPE_DICT['file_upload']).one()
        # mark as running and set new file name and path
        upload_job.job_status_id = JOB_STATUS_DICT['running']
        upload_job.original_filename = upload_file.file_name
        upload_job.filename = upload_file.upload_name

    else:
        if upload_file.file_type in ["award", "award_procurement"]:
            # file generation handled on backend, mark as ready
            upload_status = JOB_STATUS_DICT['ready']
        elif upload_file.file_type in ["executive_compensation", "sub_award"]:
            # these are dependent on file D2 validation
            upload_status = JOB_STATUS_DICT['waiting']
        else:
            # mark as running since frontend should be doing this upload
            upload_status = JOB_STATUS_DICT['running']

        upload_job = Job(original_filename=upload_file.file_name,
                         filename=upload_file.upload_name,
                         file_type_id=file_type_id,
                         job_status_id=upload_status,
                         job_type_id=JOB_TYPE_DICT['file_upload'],
                         submission_id=submission_id)
        sess.add(upload_job)
        sess.flush()

    if existing_submission:
        # if the file's validation job is attached to an existing submission,
        # reset its status and delete any validation artifacts (e.g., error metadata) that
        # might exist from a previous run.
        val_job = sess.query(Job).filter_by(
            submission_id=submission_id,
            file_type_id=file_type_id,
            job_type_id=JOB_TYPE_DICT['csv_record_validation']).one()
        val_job.job_status_id = JOB_STATUS_DICT['waiting']
        val_job.original_filename = upload_file.file_name
        val_job.filename = upload_file.upload_name
        # reset file size and number of rows to be set during validation of new file
        val_job.file_size = None
        val_job.number_of_rows = None
        # delete error metadata this might exist from a previous run of this validation job
        sess.query(ErrorMetadata).\
            filter(ErrorMetadata.job_id == val_job.job_id).\
            delete(synchronize_session='fetch')
        # delete file error information that might exist from a previous run of this validation job
        sess.query(File).filter(File.job_id == val_job.job_id).delete(
            synchronize_session='fetch')

    else:
        # create a new record validation job and add dependencies if necessary
        if upload_file.file_type == "executive_compensation":
            d1_val_job = sess.query(Job).\
                filter(Job.submission_id == submission_id,
                       Job.file_type_id == FILE_TYPE_DICT['award_procurement'],
                       Job.job_type_id == JOB_TYPE_DICT['csv_record_validation']).\
                one_or_none()
            if d1_val_job is None:
                logger.error({
                    'message': "Cannot create E job without a D1 job",
                    'message_type': 'CoreError',
                    'submission_id': submission_id,
                    'file_type': 'E'
                })
                raise Exception("Cannot create E job without a D1 job")
            # Add dependency on D1 validation job
            d1_dependency = JobDependency(job_id=upload_job.job_id,
                                          prerequisite_id=d1_val_job.job_id)
            sess.add(d1_dependency)

        elif upload_file.file_type == "sub_award":
            # todo: check for C validation job
            c_val_job = sess.query(Job).\
                filter(Job.submission_id == submission_id,
                       Job.file_type_id == FILE_TYPE_DICT['award_financial'],
                       Job.job_type_id == JOB_TYPE_DICT['csv_record_validation']).\
                one_or_none()
            if c_val_job is None:
                logger.error({
                    'message': "Cannot create F job without a C job",
                    'message_type': 'CoreError',
                    'submission_id': submission_id,
                    'file_type': 'F'
                })
                raise Exception("Cannot create F job without a C job")
            # add dependency on C validation job
            c_dependency = JobDependency(job_id=upload_job.job_id,
                                         prerequisite_id=c_val_job.job_id)
            sess.add(c_dependency)

        else:
            # E and F don't get validation jobs
            val_job = Job(original_filename=upload_file.file_name,
                          filename=upload_file.upload_name,
                          file_type_id=file_type_id,
                          job_status_id=JOB_STATUS_DICT['waiting'],
                          job_type_id=JOB_TYPE_DICT['csv_record_validation'],
                          submission_id=submission_id)
            sess.add(val_job)
            sess.flush()
            # add dependency between file upload job and file validation job
            upload_dependency = JobDependency(
                job_id=val_job.job_id, prerequisite_id=upload_job.job_id)
            sess.add(upload_dependency)
            validation_job_id = val_job.job_id

    sess.commit()

    return validation_job_id, upload_job.job_id
Пример #34
0
    def setUpClass(cls):
        """Set up resources to be shared within a test class"""
        cls.session_id = ""

        with create_validator_app().app_context():
            # update application's db config options so unittests
            # run against test databases
            suite = cls.__name__.lower()
            config = dataactcore.config.CONFIG_DB
            cls.num = randint(1, 9999)
            config['db_name'] = 'unittest{}_{}_data_broker'.format(
                cls.num, suite)
            dataactcore.config.CONFIG_DB = config
            create_database(CONFIG_DB['db_name'])
            run_migrations()

            # drop and re-create test user db/tables
            setup_user_db()
            # drop and re-create test job db/tables
            setup_job_tracker_db()
            # drop and re-create test error db/tables
            setup_error_db()
            # drop and re-create test validation db/tables
            setup_validation_db()
            # load e-mail templates
            setup_emails()

            # set up default e-mails for tests
            test_users = {
                'admin_user': '******',
                'agency_user': '******',
                'agency_user_2': '*****@*****.**',
                'no_permissions_user': '******',
                'editfabs_user': '******'
            }
            user_password = '******'
            admin_password = '******'

            # get user info and save as class variables for use by tests
            sess = GlobalDB.db().session
            admin_cgac = CGAC(cgac_code='SYS', agency_name='Admin Agency')
            cls.admin_cgac_code = admin_cgac.cgac_code
            sess.add(admin_cgac)
            sess.commit()

            cgac = CGAC(cgac_code='000', agency_name='Example Agency')
            sess.add(cgac)
            sess.commit()

            frec = FREC(frec_code='0001',
                        cgac_id=cgac.cgac_id,
                        agency_name='Example FREC')
            sess.add(frec)
            sess.commit()
            sub_tier = SubTierAgency(cgac_id=cgac.cgac_id,
                                     frec_id=frec.frec_id,
                                     sub_tier_agency_code='0000',
                                     sub_tier_agency_name='Example Sub Tier')
            sess.add(sub_tier)

            # set up users for status tests
            def add_user(email,
                         name,
                         username,
                         permission_type=ALL_PERMISSION_TYPES_DICT['writer'],
                         website_admin=False):
                user = UserFactory(email=email,
                                   website_admin=website_admin,
                                   name=name,
                                   username=username,
                                   affiliations=[
                                       UserAffiliation(
                                           cgac=cgac,
                                           permission_type_id=permission_type)
                                   ])
                user.salt, user.password_hash = get_password_hash(
                    user_password, Bcrypt())
                sess.add(user)

            add_user(test_users['agency_user'], "Test User", "testUser")
            add_user(test_users['agency_user_2'], "Test User 2", "testUser2")
            add_user(test_users['editfabs_user'],
                     "Fabs Writer",
                     "fabsWriter",
                     permission_type=ALL_PERMISSION_TYPES_DICT['editfabs'])

            # add new users
            create_user_with_password(test_users["admin_user"],
                                      admin_password,
                                      Bcrypt(),
                                      website_admin=True)
            create_user_with_password(test_users["no_permissions_user"],
                                      user_password, Bcrypt())

            agency_user = sess.query(User).filter(
                User.email == test_users['agency_user']).one()
            cls.agency_user_id = agency_user.user_id

            sess.commit()

        # set up info needed by the individual test classes
        cls.test_users = test_users
        cls.user_password = user_password
        cls.admin_password = admin_password
        cls.local = CONFIG_BROKER['local']
def generate_file(submission, file_type, start, end, agency_type, file_format):
    """ Start a file generation job for the specified file type within a submission

        Args:
            submission: submission for which we're generating the file
            file_type: type of file to generate the job for
            start: the start date for the file to generate
            end: the end date for the file to generate
            agency_type: The type of agency (awarding or funding) to generate the file for (only used for D file
                generation)
            file_format: determines if the file generated is a txt or a csv (only used for D file generation)

        Returns:
            Results of check_generation or JsonResponse object containing an error if the prerequisite job isn't
            complete.
    """
    error_message = None
    # submission is a FABS submission
    if submission.d2_submission:
        error_message = 'Cannot generate files for FABS submissions.'

    elif file_type in ['D1', 'D2']:
        # D file generation requires start and end date
        if not start or not end:
            error_message = 'Must have a start and end date for D file generation.'
        # D files can only be generated by awarding or funding agency
        elif agency_type not in ['awarding', 'funding']:
            error_message = 'agency_type must be either awarding or funding for D file generation.'
        elif file_format not in ['csv', 'txt']:
            error_message = 'file_format must be either csv or txt for D file generation.'

    # Only D1, D2, E, and F files can be generated
    elif file_type not in ['E', 'F']:
        error_message = 'File type must be either D1, D2, E, or F'

    # Return any client errors
    if error_message:
        return JsonResponse.error(ValueError(error_message),
                                  StatusCode.CLIENT_ERROR)

    sess = GlobalDB.db().session
    job = sess.query(Job).filter(
        Job.submission_id == submission.submission_id,
        Job.file_type_id == lookups.FILE_TYPE_DICT_LETTER_ID[file_type],
        Job.job_type_id == lookups.JOB_TYPE_DICT['file_upload']).one()
    logger.info({
        'message':
        'Starting {} file generation within submission {}'.format(
            file_type, submission.submission_id),
        'message_type':
        'BrokerInfo',
        'submission_id':
        submission.submission_id,
        'job_id':
        job.job_id,
        'file_type':
        file_type
    })

    # Check prerequisites on upload job
    if not generation_helper.check_generation_prereqs(submission.submission_id,
                                                      file_type):
        return JsonResponse.error(
            ResponseException(
                'Must wait for completion of prerequisite validation job',
                StatusCode.CLIENT_ERROR), StatusCode.CLIENT_ERROR)
    try:
        if file_type in ['D1', 'D2']:
            generation_helper.start_d_generation(job,
                                                 start,
                                                 end,
                                                 agency_type,
                                                 file_format=file_format)
        else:
            generation_helper.start_e_f_generation(job)
    except Exception as e:
        mark_job_status(job.job_id, 'failed')
        job.error_message = str(e)
        sess.commit()
        return JsonResponse.error(e, StatusCode.INTERNAL_ERROR)

    # Return same response as check generation route
    return check_generation(submission, file_type)
    def loadSql(cls, filename):
        """Load SQL-based validation rules to db."""
        with createApp().app_context():
            sess = GlobalDB.db().session

            # Delete all records currently in table
            sess.query(RuleSql).delete()

            filename = os.path.join(cls.sql_rules_path, filename)

            # open csv
            with open(filename, 'rU') as csvfile:
                # read header
                header = csvfile.readline()
                # split header into filed names
                rawFieldNames = header.split(',')
                fieldNames = []
                # clean field names
                for field in rawFieldNames:
                    fieldNames.append(FieldCleaner.cleanString(field))

                unknownFields = set(fieldNames) - set(cls.headers)
                if len(unknownFields) != 0:
                    raise KeyError("".join([
                        "Found unexpected fields: ",
                        str(list(unknownFields))
                    ]))

                missingFields = set(cls.headers) - set(fieldNames)
                if len(missingFields) != 0:
                    raise ValueError("".join([
                        "Missing required fields: ",
                        str(list(missingFields))
                    ]))

                reader = csv.DictReader(csvfile, fieldnames=fieldNames)
                for row in reader:
                    sql = cls.readSqlStr(row['query_name'])

                    rule_sql = RuleSql(
                        rule_sql=sql,
                        rule_label=row['rule_label'],
                        rule_description=row['rule_description'],
                        rule_error_message=row['rule_error_message'],
                        query_name=row['query_name'])

                    # look up file type id
                    try:
                        fileId = FILE_TYPE_DICT[row["file_type"]]
                    except Exception as e:
                        raise Exception(
                            "{}: file type={}, rule label={}. Rule not loaded."
                            .format(e, row["file_type"], row["rule_label"]))
                    try:
                        if row["target_file"].strip() == "":
                            # No target file provided
                            targetFileId = None
                        else:
                            targetFileId = FILE_TYPE_DICT[row["target_file"]]
                    except Exception as e:
                        raise Exception(
                            "{}: file type={}, rule label={}. Rule not loaded."
                            .format(e, row["target_file"], row["rule_label"]))

                    # set cross file flag
                    if (FieldCleaner.cleanString(row["rule_cross_file_flag"])
                            in ['true', 't', 'y', 'yes']):
                        cross_file_flag = True
                    else:
                        cross_file_flag = False

                    rule_sql.rule_severity_id = RULE_SEVERITY_DICT[
                        row['severity_name']]
                    rule_sql.file_id = fileId
                    rule_sql.target_file_id = targetFileId
                    rule_sql.rule_cross_file_flag = cross_file_flag

                    sess.merge(rule_sql)
            sess.commit()
Пример #37
0
def load_cfda_program(base_path,
                      load_local=False,
                      local_file_name="cfda_program.csv"):
    """ Load cfda program.

        Args:
            base_path: directory that contains the cfda values files.
            load_local: boolean indicating whether to load from a local file or not
            local_file_name: the name of the file if loading locally
    """
    local_now = datetime.now()
    if not load_local:
        logger.info("Fetching CFDA file from {}".format(S3_CFDA_FILE))
        tmp_name = str(time.time()).replace(".", "") + "_cfda_program.csv"
        filename = os.path.join(base_path, tmp_name)
        r = requests.get(S3_CFDA_FILE, allow_redirects=True)
        open(filename, 'wb').write(r.content)
    else:
        filename = os.path.join(base_path, local_file_name)
    logger.info('Loading CFDA program file: ' + filename)
    model = CFDAProgram

    metrics_json = {
        'script_name': 'load_cfda_data.py',
        'start_time': str(local_now),
        'new_records': 0
    }

    def fix_program_number(row, decimals=3):
        multiplier = 10**decimals
        value = math.floor(row['program_number'] * multiplier +
                           0.5) / multiplier
        return str(value).ljust(6, '0')

    with create_app().app_context():
        configure_logging()
        sess = GlobalDB.db().session

        import_data = pd.read_csv(filename,
                                  dtype=str,
                                  encoding='cp1252',
                                  na_filter=False)
        import_data = clean_data(import_data, model, DATA_CLEANING_MAP, {})
        import_data["published_date"] = format_date(
            import_data["published_date"])
        import_data["archived_date"] = format_date(
            import_data["archived_date"])
        table_name = model.__table__.name
        # Check if there is new data to load
        new_data = check_dataframe_diff(import_data,
                                        model, ['cfda_program_id'],
                                        ['program_number'],
                                        lambda_funcs=[('program_number',
                                                       fix_program_number)])
        if new_data:
            # insert to db
            sess.query(model).delete()
            num = insert_dataframe(import_data, table_name, sess.connection())
            sess.commit()
    if not load_local:
        os.remove(filename)
    if new_data:
        logger.info('{} records inserted to {}'.format(num, table_name))
        metrics_json['new_records'] = num
    else:
        logger.info("Skipped cfda load, no new data.")
        sys.exit(3)

    metrics_json['duration'] = str(datetime.now() - local_now)

    with open('load_cfda_data_metrics.json', 'w+') as metrics_file:
        json.dump(metrics_json, metrics_file)
    method.add_argument('-m',
                        '--min_id',
                        type=int,
                        nargs=1,
                        help="Load all data from a minimum id (0 for complete"
                        " backfill)")
    method.add_argument(
        '-i',
        '--ids',
        type=int,
        nargs='+',
        help="Single or list of FSRS ids to populate the subaward table")

    with create_app().app_context():
        logger.info("Begin backfilling Subaward table")
        sess = GlobalDB.db().session
        args = parser.parse_args()

        metrics_json = {
            'script_name': 'populate_subaward_table.py',
            'records_inserted': 0,
            'start_time': str(now)
        }

        service_types = []
        if not (args.procurements or args.grants):
            logger.error(
                'FSRS types not provided. Please specify procurements, grants, or both.'
            )
            sys.exit(1)
        if args.procurements:
Пример #39
0
    def setUpClass(cls):
        """Set up resources to be shared within a test class"""
        # TODO: refactor into a pytest class fixtures and inject as necessary
        # update application's db config options so unittests
        # run against test databases
        configure_logging()
        suite = cls.__name__.lower()
        config = dataactcore.config.CONFIG_DB
        cls.num = randint(1, 9999)
        config['db_name'] = 'unittest{}_{}_data_broker'.format(cls.num, suite)
        dataactcore.config.CONFIG_DB = config
        create_database(CONFIG_DB['db_name'])
        run_migrations()

        app = create_app()
        app.config['TESTING'] = True
        app.config['DEBUG'] = False
        cls.app = TestApp(app)
        sess = GlobalDB.db().session

        # set up default e-mails for tests
        test_users = {
            'admin_user': '******',
            'agency_user': '******',
            'agency_user_2': '*****@*****.**',
            'no_permissions_user': '******',
            'editfabs_user': '******'
        }
        admin_password = '******'

        cgac = CGAC(cgac_code='000', agency_name='Example Agency')
        sess.add(cgac)
        sess.commit()

        # Allow us to augment default test failure msg w/ more detail
        cls.longMessage = True
        # Upload files to S3 (False = skip re-uploading on subsequent runs)
        cls.uploadFiles = True
        # Run tests for local broker or not
        cls.local = CONFIG_BROKER['local']
        # This needs to be set to the local directory for error reports if local is True
        cls.local_file_directory = CONFIG_SERVICES['error_report_path']

        # drop and re-create test job db/tables
        setup_job_tracker_db()
        # drop and re-create test error db/tables
        setup_error_db()
        # drop and re-create test validation db
        setup_validation_db()

        # setup Schema

        SchemaLoader.load_all_from_path(validator_config_path)
        load_sql_rules()

        create_user_with_password(test_users["admin_user"],
                                  admin_password,
                                  Bcrypt(),
                                  website_admin=True)
        cls.userId = None
        cls.test_users = test_users
        # constants to use for default submission start and end dates
        cls.SUBMISSION_START_DEFAULT = datetime(2015, 10, 1)
        cls.SUBMISSION_END_DEFAULT = datetime(2015, 10, 31)
def load_sub_tier_agencies(file_name, force_reload=False):
    """ Load Sub Tier Agency (sub_tier-level agency names) lookup table.

        Args:
            file_name: path/url to the file to be read
            force_reload: whether to reload regardless

        Returns:
            True if new data was loaded, False if the load was skipped
    """
    sess = GlobalDB.db().session
    models = {sub_tier_agency.sub_tier_agency_code: sub_tier_agency for
              sub_tier_agency in sess.query(SubTierAgency)}

    # read Sub Tier Agency values from csv
    data = pd.read_csv(file_name, dtype=str)

    condition = data['TOPTIER_FLAG'] == 'TRUE'
    data.loc[condition, 'PRIORITY'] = 1
    data.loc[~condition, 'PRIORITY'] = 2
    data.replace({'TRUE': True, 'FALSE': False}, inplace=True)

    # clean data
    data = clean_data(
        data,
        SubTierAgency,
        {'cgac_agency_code': 'cgac_code', 'subtier_code': 'sub_tier_agency_code', 'priority': 'priority',
         'frec': 'frec_code', 'subtier_name': 'sub_tier_agency_name', 'is_frec': 'is_frec'},
        {'cgac_code': {'pad_to_length': 3}, 'frec_code': {'pad_to_length': 4},
         'sub_tier_agency_code': {'pad_to_length': 4}}
    )
    # de-dupe
    data.drop_duplicates(subset=['sub_tier_agency_code'], inplace=True)
    # create foreign key dicts
    cgac_dict = {str(cgac.cgac_code): cgac.cgac_id for
                 cgac in sess.query(CGAC).filter(CGAC.cgac_code.in_(data['cgac_code'])).all()}
    cgac_dict_flipped = {cgac_id: cgac_code for cgac_code, cgac_id in cgac_dict.items()}
    frec_dict = {str(frec.frec_code): frec.frec_id for
                 frec in sess.query(FREC).filter(FREC.frec_code.in_(data['frec_code'])).all()}
    frec_dict_flipped = {frec_id: frec_code for frec_code, frec_id in frec_dict.items()}

    # compare to existing content in table
    def int_to_float(row):
        return float(row['priority'])

    def extract_cgac(row):
        return cgac_dict_flipped[row['cgac_id']] if row['cgac_id'] in cgac_dict_flipped else np.nan

    def extract_frec(row):
        return frec_dict_flipped[row['frec_id']] if row['frec_id'] in frec_dict_flipped else np.nan

    diff_found = check_dataframe_diff(data, SubTierAgency, ['sub_tier_agency_id', 'cgac_id', 'frec_id'],
                                      ['sub_tier_agency_code'],
                                      lambda_funcs=[('priority', int_to_float), ('cgac_code', extract_cgac),
                                                    ('frec_code', extract_frec)])
    if force_reload or diff_found:
        delete_missing_sub_tier_agencies(models, data)
        update_sub_tier_agencies(models, data, cgac_dict, frec_dict)
        sess.add_all(models.values())
        sess.commit()

        logger.info('%s Sub Tier Agency records inserted', len(models))
        return True
    else:
        logger.info('No differences found, skipping subtier table reload.')
        return False
Пример #41
0
def perms_to_affiliations(perms, user_id, service_account_flag=False):
    """ Convert a list of perms from MAX to a list of UserAffiliations. Filter out and log any malformed perms

        Args:
            perms: list of permissions (as strings) for the user
            user_id: the ID of the user
            service_account_flag: flag to indicate a service account
        Yields:
            UserAffiliations based on the permissions provided
    """
    available_cgacs = {cgac.cgac_code: cgac for cgac in GlobalDB.db().session.query(CGAC)}
    available_frecs = {frec.frec_code: frec for frec in GlobalDB.db().session.query(FREC)}
    log_data = {
        'message_type': 'BrokerWarning',
        'user_id': user_id
    }
    for perm in perms:
        log_data['message'] = 'User with ID {} has malformed permission: {}'.format(user_id, perm)
        components = perm.split('-PERM_')
        if len(components) != 2:
            logger.warning(log_data)
            continue

        codes, perm_level = components
        split_codes = codes.split('-FREC_')
        frec_code, cgac_code = None, None
        if len(split_codes) == 2:
            # permissions for FR entity code and readonly CGAC
            frec_code, cgac_code = split_codes[1], split_codes[0]
            if frec_code not in available_frecs or cgac_code not in available_cgacs:
                logger.warning(log_data)
                continue
        else:
            # permissions for CGAC
            cgac_code = codes
            if cgac_code not in available_cgacs:
                logger.warning(log_data)
                continue

        perm_level = perm_level.lower()

        if service_account_flag:
            # Replace MAX Service Account permissions with Broker "write" and "editfabs" permissions
            perm_level = 'we'
        elif perm_level not in 'rwsef':
            logger.warning(log_data)
            continue

        for permission in perm_level:
            if frec_code:
                yield UserAffiliation(
                    cgac=available_cgacs[cgac_code],
                    frec=None,
                    permission_type_id=PERMISSION_SHORT_DICT['r']
                )
                yield UserAffiliation(
                    cgac=None,
                    frec=available_frecs[frec_code],
                    permission_type_id=PERMISSION_SHORT_DICT[permission]
                )
            else:
                yield UserAffiliation(
                    cgac=available_cgacs[cgac_code] if cgac_code else None,
                    frec=None,
                    permission_type_id=PERMISSION_SHORT_DICT[permission]
                )
Пример #42
0
    def max_login(self, session):
        """ Logs a user in if their password matches using MAX

            Args:
                session: Session object from flask

            Returns:
                A JsonResponse containing the user information or details on which error occurred, such as whether a
                type was wrong, something wasn't implemented, invalid keys were provided, login was denied, or a
                different, unexpected error occurred.
        """
        try:
            safe_dictionary = RequestDictionary(self.request)

            ticket = safe_dictionary.get_value("ticket")
            service = safe_dictionary.get_value('service')

            # Call MAX's serviceValidate endpoint and retrieve the response
            max_dict = get_max_dict(ticket, service)

            if 'cas:authenticationSuccess' not in max_dict['cas:serviceResponse']:
                raise ValueError("The Max CAS endpoint was unable to locate your session "
                                 "using the ticket/service combination you provided.")
            cas_attrs = max_dict['cas:serviceResponse']['cas:authenticationSuccess']['cas:attributes']

            # Grab MAX ID to see if a service account is being logged in
            max_id_components = cas_attrs['maxAttribute:MAX-ID'].split('_')
            service_account_flag = (len(max_id_components) > 1 and max_id_components[0].lower() == 's')

            # Grab the email and list of groups from MAX's response
            email = cas_attrs['maxAttribute:Email-Address']

            try:
                sess = GlobalDB.db().session
                user = sess.query(User).filter(func.lower(User.email) == func.lower(email)).one_or_none()

                # If the user does not exist, create them since they are allowed to access the site because they got
                # past the above group membership checks
                if user is None:
                    user = User()
                    user.email = email

                set_user_name(user, cas_attrs)

                set_max_perms(user, cas_attrs['maxAttribute:GroupList'], service_account_flag)

                sess.add(user)
                sess.commit()

            except MultipleResultsFound:
                raise ValueError("An error occurred during login.")

            return self.create_session_and_response(session, user)

        # Catch any specifically raised errors or any other errors that may have happened and return them cleanly.
        # We add the error parameter here because this endpoint needs to provide better feedback, and to avoid changing
        # the default behavior of the JsonResponse class globally.
        except (TypeError, KeyError, NotImplementedError) as e:
            # Return a 400 with appropriate message
            return JsonResponse.error(e, StatusCode.CLIENT_ERROR, error=str(e))
        except ValueError as e:
            # Return a 401 for login denied
            return JsonResponse.error(e, StatusCode.LOGIN_REQUIRED, error=str(e))
        except Exception as e:
            # Return 500
            return JsonResponse.error(e, StatusCode.INTERNAL_ERROR, error=str(e))
def get_cgacs_without_sub_tier_agencies(sess=None):
    if sess is None:
        sess = GlobalDB.db().session
    cgac_sub_tiers = sess.query(SubTierAgency).distinct(SubTierAgency.cgac_id)
    return sess.query(CGAC).filter(
        CGAC.cgac_id.notin_([st.cgac.cgac_id for st in cgac_sub_tiers])).all()
Пример #44
0
def load_program_activity_data(base_path):
    """ Load program activity lookup table.

        Args:
            base_path: directory of domain config files
    """
    last_upload = get_date_of_current_pa_upload(base_path)
    if not (last_upload > get_stored_pa_last_upload()):
        return

    program_activity_file = get_program_activity_file(base_path)

    logger.info('Loading program activity: ' + PA_FILE_NAME)

    with create_app().app_context():
        sess = GlobalDB.db().session
        try:
            data = pd.read_csv(program_activity_file, dtype=str)
        except pd.io.common.EmptyDataError as e:
            log_blank_file()
            sys.exit(
                4
            )  # exit code chosen arbitrarily, to indicate distinct failure states

        headers = set([header.upper() for header in list(data)])

        if not VALID_HEADERS.issubset(headers):
            logger.error(
                "Missing required headers. Required headers include: %s" %
                str(VALID_HEADERS))
            sys.exit(4)

        try:
            dropped_count, data = clean_data(
                data, ProgramActivity, {
                    "fyq": "fiscal_year_quarter",
                    "agency_code": "agency_id",
                    "allocation_id": "allocation_transfer_id",
                    "account_code": "account_number",
                    "pa_code": "program_activity_code",
                    "pa_title": "program_activity_name"
                }, {
                    "program_activity_code": {
                        "pad_to_length": 4
                    },
                    "agency_id": {
                        "pad_to_length": 3
                    },
                    "allocation_transfer_id": {
                        "pad_to_length": 3,
                        "keep_null": True
                    },
                    "account_number": {
                        "pad_to_length": 4
                    }
                }, [
                    "agency_id", "program_activity_code", "account_number",
                    "program_activity_name"
                ], True)
        except FailureThresholdExceededException as e:
            if e.count == 0:
                log_blank_file()
                sys.exit(4)
            else:
                count_str = "Application tried to drop {} rows".format(e.count)
                logger.error(
                    "Loading of program activity file failed due to exceeded failure threshold. "
                    + count_str)
                sys.exit(5)

        sess.query(ProgramActivity).delete()

        # Lowercase Program Activity Name
        data['program_activity_name'] = data['program_activity_name'].apply(
            lambda x: lowercase_or_notify(x))

        # because we're only loading a subset of program activity info,
        # there will be duplicate records in the dataframe. this is ok,
        # but need to de-duped before the db load. We also need to log them.
        base_count = data.shape[0]
        data.drop_duplicates(inplace=True)
        logger.info("Dropped {} duplicate rows.".format(base_count -
                                                        data.shape[0]))

        # insert to db
        table_name = ProgramActivity.__table__.name
        num = insert_dataframe(data, table_name, sess.connection())
        sess.commit()

    set_stored_pa_last_upload(last_upload)
    logger.info('{} records inserted to {}'.format(num, table_name))

    if dropped_count > 0:
        sys.exit(3)
 def purge():
     sess = GlobalDB.db().session
     sess.query(SQS).delete()
     sess.commit()
Пример #46
0
 def teardown_appcontext(exception):
     GlobalDB.close()
 def receive_messages(WaitTimeSeconds, MessageAttributeNames=None):  # noqa
     sess = GlobalDB.db().session
     messages = []
     for sqs in sess.query(SQS):
         messages.append(SQSMockMessage(sqs))
     return messages
Пример #48
0
    def setUpClass(cls):
        """Set up class-wide resources."""
        super(MixedFileTests, cls).setUpClass()
        user = cls.userId
        force_tas_load = False

        with createApp().app_context():
            # get the submission test user
            sess = GlobalDB.db().session

            # Create test submissions and jobs, also uploading
            # the files needed for each job.
            statusReadyId = JOB_STATUS_DICT['ready']
            jobTypeCsvId = JOB_TYPE_DICT['csv_record_validation']
            jobDict = {}

            # next three jobs belong to the same submission and are tests
            # for single-file validations that contain failing rows
            submissionId = cls.insertSubmission(sess, user)
            job_info = Job(filename=cls.uploadFile("appropMixed.csv", user),
                           job_status_id=statusReadyId,
                           job_type_id=jobTypeCsvId,
                           file_type_id=FILE_TYPE_DICT['appropriations'],
                           submission_id=submissionId)
            sess.add(job_info)
            sess.flush()
            jobDict['mixed'] = job_info.job_id

            job_info = Job(filename=cls.uploadFile("programActivityMixed.csv",
                                                   user),
                           job_status_id=statusReadyId,
                           job_type_id=jobTypeCsvId,
                           file_type_id=FILE_TYPE_DICT['program_activity'],
                           submission_id=submissionId)
            sess.add(job_info)
            sess.flush()
            jobDict['programMixed'] = job_info.job_id

            job_info = Job(filename=cls.uploadFile("awardMixed.csv", user),
                           job_status_id=statusReadyId,
                           job_type_id=jobTypeCsvId,
                           file_type_id=FILE_TYPE_DICT['award'],
                           submission_id=submissionId)
            sess.add(job_info)
            sess.flush()
            jobDict['awardMixed'] = job_info.job_id

            # next job tests single-file validations for award_financial
            # (submission has a non-Q1 end date)
            submissionId = cls.insertSubmission(sess, user,
                                                datetime(2015, 3, 15))
            job_info = Job(filename=cls.uploadFile("awardFinancialMixed.csv",
                                                   user),
                           job_status_id=statusReadyId,
                           job_type_id=jobTypeCsvId,
                           file_type_id=FILE_TYPE_DICT['award_financial'],
                           submission_id=submissionId)
            sess.add(job_info)
            sess.flush()
            jobDict['awardFinMixed'] = job_info.job_id

            # job below tests a file that has a mixed-delimiter heading
            submissionId = cls.insertSubmission(sess, user)
            job_info = Job(filename=cls.uploadFile("awardMixedDelimiter.csv",
                                                   user),
                           job_status_id=statusReadyId,
                           job_type_id=jobTypeCsvId,
                           file_type_id=FILE_TYPE_DICT['award'],
                           submission_id=submissionId)
            sess.add(job_info)
            sess.flush()
            jobDict['awardMixedDelimiter'] = job_info.job_id

            # next five jobs are cross-file and belong to the same submission
            submissionId = cls.insertSubmission(sess, user)
            job_info = Job(filename=cls.uploadFile("cross_file_A.csv", user),
                           job_status_id=statusReadyId,
                           job_type_id=jobTypeCsvId,
                           file_type_id=FILE_TYPE_DICT['appropriations'],
                           submission_id=submissionId)
            sess.add(job_info)
            sess.flush()
            jobDict['crossApprop'] = job_info.job_id

            job_info = Job(filename=cls.uploadFile("cross_file_B.csv", user),
                           job_status_id=statusReadyId,
                           job_type_id=jobTypeCsvId,
                           file_type_id=FILE_TYPE_DICT['program_activity'],
                           submission_id=submissionId)
            sess.add(job_info)
            sess.flush()
            jobDict['crossPgmAct'] = job_info.job_id

            job_info = Job(filename=cls.uploadFile("cross_file_C.csv", user),
                           job_status_id=statusReadyId,
                           job_type_id=jobTypeCsvId,
                           file_type_id=FILE_TYPE_DICT['award_financial'],
                           submission_id=submissionId)
            sess.add(job_info)
            sess.flush()
            jobDict['crossAwardFin'] = job_info.job_id

            job_info = Job(filename=cls.uploadFile("cross_file_D2.csv", user),
                           job_status_id=statusReadyId,
                           job_type_id=jobTypeCsvId,
                           file_type_id=FILE_TYPE_DICT['award'],
                           submission_id=submissionId)
            sess.add(job_info)
            sess.flush()
            jobDict['crossAward'] = job_info.job_id

            job_info = Job(job_status_id=statusReadyId,
                           job_type_id=JOB_TYPE_DICT['validation'],
                           submission_id=submissionId)
            sess.add(job_info)
            sess.flush()
            jobDict['crossFile'] = job_info.job_id

            # next four jobs test short columns names and belong to the same submission
            submissionId = cls.insertSubmission(sess, user)
            job_info = Job(filename=cls.uploadFile("appropValidShortcols.csv",
                                                   user),
                           job_status_id=statusReadyId,
                           job_type_id=jobTypeCsvId,
                           file_type_id=FILE_TYPE_DICT['appropriations'],
                           submission_id=submissionId)
            sess.add(job_info)
            sess.flush()
            jobDict['appropValidShortcols'] = job_info.job_id

            job_info = Job(filename=cls.uploadFile(
                "programActivityMixedShortcols.csv", user),
                           job_status_id=statusReadyId,
                           job_type_id=jobTypeCsvId,
                           file_type_id=FILE_TYPE_DICT['program_activity'],
                           submission_id=submissionId)
            sess.add(job_info)
            sess.flush()
            jobDict['programMixedShortcols'] = job_info.job_id

            job_info = Job(filename=cls.uploadFile(
                "awardFinancialMixedShortcols.csv", user),
                           job_status_id=statusReadyId,
                           job_type_id=jobTypeCsvId,
                           file_type_id=FILE_TYPE_DICT['award_financial'],
                           submission_id=submissionId)
            sess.add(job_info)
            sess.flush()
            jobDict['awardFinMixedShortcols'] = job_info.job_id

            job_info = Job(filename=cls.uploadFile("awardValidShortcols.csv",
                                                   user),
                           job_status_id=statusReadyId,
                           job_type_id=jobTypeCsvId,
                           file_type_id=FILE_TYPE_DICT['award'],
                           submission_id=submissionId)
            sess.add(job_info)
            sess.flush()
            jobDict['awardValidShortcols'] = job_info.job_id

            # commit submissions/jobs and output IDs
            sess.commit()
            for job_type, job_id in jobDict.items():
                print('{}: {}'.format(job_type, job_id))

            # Load fields and rules
            FileTypeTests.load_definitions(sess, force_tas_load,
                                           cls.RULES_TO_APPLY)

            cls.jobDict = jobDict
Пример #49
0
    def test_password_reset_email(self):
        """Test password reset email."""
        self.logout()
        email = self.test_users["password_reset_email"]
        postJson = {"email": email}
        response = self.app.post_json(
            "/v1/reset_password/",
            postJson,
            headers={"x-session-id": self.session_id})
        self.check_response(response, StatusCode.OK)

        # Test password reset for unapproved user and locked user
        with createApp().app_context():
            sess = GlobalDB.db().session
            user = sess.query(User).filter(User.email == email).one()
            user.user_status_id = self.userStatusDict['awaiting_approval']
            sess.commit()
            response = self.app.post_json(
                "/v1/reset_password/",
                postJson,
                headers={"x-session-id": self.session_id},
                expect_errors=True)
            self.check_response(response, StatusCode.CLIENT_ERROR)

            user.user_status_id = self.userStatusDict['approved']
            user.is_active = False
            sess.commit()
            response = self.app.post_json(
                "/v1/reset_password/",
                postJson,
                headers={"x-session-id": self.session_id},
                expect_errors=True)
            self.check_response(response, StatusCode.CLIENT_ERROR)

            # Test route to confirm tokens
            token = sesEmail.createToken(
                self.test_users["password_reset_email"], "password_reset")
            postJson = {"token": token}
            response = self.app.post_json(
                "/v1/confirm_password_token/",
                postJson,
                headers={"x-session-id": self.session_id})
            self.check_response(response, StatusCode.OK, "success")
            self.assertEqual(response.json["errorCode"], sesEmail.LINK_VALID)

            postJson = {"user_email": email, "password": self.user_password}
            response = self.app.post_json(
                "/v1/set_password/",
                postJson,
                headers={"x-session-id": self.session_id})
            self.check_response(response, StatusCode.OK,
                                "Password successfully changed")
            user = sess.query(User).filter(User.email == email).one()
            self.assertTrue(user.password_hash)

        # Call again, should error
        postJson = {"user_email": email, "password": self.user_password}
        response = self.app.post_json(
            "/v1/set_password/",
            postJson,
            headers={"x-session-id": self.session_id},
            expect_errors=True)
        self.check_response(response, StatusCode.LOGIN_REQUIRED)
    def run_test(self,
                 jobId,
                 statusId,
                 statusName,
                 fileSize,
                 stagingRows,
                 errorStatus,
                 numErrors,
                 numWarnings=0,
                 warningFileSize=None):
        """ Runs a validation test

        Args:
            jobId: ID of job for this validation
            statusId: Expected HTTP status code for this test
            statusName: Expected status in job tracker, False if job should not exist
            fileSize: Expected file size of error report, False if error report should not exist
            stagingRows: Expected number of rows in validation db staging tables. False if no rows are expected
            errorStatus: Expected status in file table of error DB, False if file object should not exist
            numErrors: Expected number of errors
            rowErrorsPresent: Checks flag for whether row errors occurred, None to skip the check

        Returns:

        """
        with createApp().app_context():
            sess = GlobalDB.db().session

            response = self.validateJob(jobId)
            self.assertEqual(response.status_code, statusId,
                             str(self.getResponseInfo(response)))

            # get the job from db
            job = sess.query(Job).filter(Job.job_id == jobId).one()
            if statusName is not False:
                self.assertEqual(job.job_status_id,
                                 JOB_STATUS_DICT[statusName])

            self.assertEqual(response.headers.get("Content-Type"),
                             "application/json")

            # Check valid row count for this job
            if stagingRows is not False:
                self.assertEqual(job.number_of_rows_valid, stagingRows)

            if errorStatus is not False:
                self.assertEqual(
                    sess.query(File).filter(
                        File.job_id == jobId).one().file_status_id,
                    FILE_STATUS_DICT[errorStatus])
                self.assertEqual(checkNumberOfErrorsByJobId(jobId, 'fatal'),
                                 numErrors)
                self.assertEqual(checkNumberOfErrorsByJobId(jobId, 'warning'),
                                 numWarnings)

            if fileSize is not False:
                reportPath = getReportPath(job, 'error')
                if self.local:
                    self.assertFileSizeAppxy(fileSize, reportPath)
                else:
                    self.assertGreater(
                        s3UrlHandler.getFileSize(
                            'errors/{}'.format(reportPath)), fileSize - 5)
                    self.assertLess(
                        s3UrlHandler.getFileSize(
                            'errors/{}'.format(reportPath)), fileSize + 5)

            if warningFileSize is not None and warningFileSize is not False:
                reportPath = getReportPath(job, 'warning')
                if self.local:
                    self.assertFileSizeAppxy(warningFileSize, reportPath)
                else:
                    self.assertGreater(
                        s3UrlHandler.getFileSize(
                            'errors/{}'.format(reportPath)),
                        warningFileSize - 5)
                    self.assertLess(
                        s3UrlHandler.getFileSize(
                            'errors/{}'.format(reportPath)),
                        warningFileSize + 5)

        return response
Пример #51
0
 def tearDownClass(cls):
     """Tear down class-level resources."""
     GlobalDB.close()
     drop_database(CONFIG_DB['db_name'])
Пример #52
0
def main():
    now = datetime.now()
    parser = argparse.ArgumentParser(
        description='Pull data from the Federal Hierarchy API.')
    parser.add_argument('-a',
                        '--all',
                        help='Clear out the database and get historical data',
                        action='store_true')
    parser.add_argument('-f',
                        '--filename',
                        help='Generate a local CSV file from the data.',
                        nargs=1,
                        type=str)
    parser.add_argument('-o',
                        '--export_office',
                        help='Export the current office table. '
                        'Please provide the file name/path.',
                        nargs=1,
                        type=str)
    parser.add_argument('-d',
                        '--pull_date',
                        help='Date from which to start the pull',
                        nargs=1,
                        type=str)
    parser.add_argument('-i',
                        '--ignore_db',
                        help='Do not update the DB tables',
                        action='store_true')
    args = parser.parse_args()

    if args.all and args.pull_date:
        logger.error("The -a and -d flags conflict, cannot use both at once.")
        sys.exit(1)

    metrics_json = {
        'script_name': 'load_federal_hierarchy.py',
        'start_time': str(now),
        'level_1_records': 0,
        'level_2_records': 0,
        'level_3_records': 0,
        'level_4_records': 0,
        'level_5_records': 0,
        'level_6_records': 0,
        'level_7_records': 0
    }

    # Handle the pull_date parameter
    updated_date_from = None
    if args.pull_date:
        try:
            updated_date_from = args.pull_date[0]
            datetime.strptime(updated_date_from, "%Y-%m-%d")
        except ValueError:
            logger.error("The date given to the -d flag was not parseable.")
            sys.exit(1)

    # Get or create the start date
    sess = GlobalDB.db().session
    if not args.all and not updated_date_from:
        last_pull_date = sess.query(func.max(Office.updated_at)).one_or_none()
        if not last_pull_date:
            logger.error(
                "The -a or -d flag must be set when there are no Offices present in the database."
            )
            sys.exit(1)
        updated_date_from = last_pull_date[0].date()

    # Handle the filename parameter
    filename = args.filename[0] if args.filename else None

    # Handle the export office parameter
    export_office = args.export_office[0] if args.export_office else None

    # Handle a complete data reload
    if args.all and not args.ignore_db:
        logger.info("Emptying out the Office table for a complete reload.")
        sess.execute('''TRUNCATE TABLE office RESTART IDENTITY''')

    try:
        pull_offices(sess, filename, not args.ignore_db, args.all,
                     updated_date_from, export_office, metrics_json)
    except Exception as e:
        logger.error(str(e))
        sys.exit(1)

    metrics_json['duration'] = str(datetime.now() - now)

    with open('load_federal_hierarchy_metrics.json', 'w+') as metrics_file:
        json.dump(metrics_json, metrics_file)
    logger.info("Script complete")
Пример #53
0
    def test_cross_file(self):
        crossId = self.jobDict["crossFile"]
        # Run jobs for A, B, C, and D2, then cross file validation job
        # Note: test files used for cross validation use the short column names
        # as a way to ensure those are handled correctly by the validator
        awardFinResponse = self.validateJob(self.jobDict["crossAwardFin"])
        self.assertEqual(awardFinResponse.status_code,
                         200,
                         msg=str(awardFinResponse.json))
        awardResponse = self.validateJob(self.jobDict["crossAward"])
        self.assertEqual(awardResponse.status_code,
                         200,
                         msg=str(awardResponse.json))
        appropResponse = self.validateJob(self.jobDict["crossApprop"])
        self.assertEqual(appropResponse.status_code,
                         200,
                         msg=str(appropResponse.json))
        pgmActResponse = self.validateJob(self.jobDict["crossPgmAct"])
        self.assertEqual(pgmActResponse.status_code,
                         200,
                         msg=str(pgmActResponse.json))
        crossFileResponse = self.validateJob(crossId)
        self.assertEqual(crossFileResponse.status_code,
                         200,
                         msg=str(crossFileResponse.json))

        with createApp().app_context():
            sess = GlobalDB.db().session

            job = sess.query(Job).filter(Job.job_id == crossId).one()

            # Check number of cross file validation errors in DB for this job
            self.assertEqual(checkNumberOfErrorsByJobId(crossId, "fatal"), 0)
            self.assertEqual(checkNumberOfErrorsByJobId(crossId, "warning"), 3)
            self.assertEqual(job.job_status_id, JOB_STATUS_DICT['finished'])

            # Check that cross file validation report exists and is the right size
            submissionId = job.submission_id
            sizePathPairs = [
                (89,
                 get_cross_report_name(submissionId, "appropriations",
                                       "program_activity")),
                (89,
                 get_cross_report_name(submissionId, "award_financial",
                                       "award")),
                (2363,
                 get_cross_warning_report_name(submissionId, "appropriations",
                                               "program_activity")),
                (89,
                 get_cross_warning_report_name(submissionId, "award_financial",
                                               "award")),
            ]

        for size, path in sizePathPairs:
            if self.local:
                self.assertFileSizeAppxy(size, path)
            else:
                self.assertGreater(s3UrlHandler.getFileSize("errors/" + path),
                                   size - 5)
                self.assertLess(s3UrlHandler.getFileSize("errors/" + path),
                                size + 5)
Пример #54
0
 def clear_sessions():
     """ Removes old sessions that are expired """
     new_time = to_unix_time(datetime.utcnow())
     sess = GlobalDB.db().session
     sess.query(SessionMap).filter(SessionMap.expiration < new_time).delete()
     sess.commit()
def setup_user_db():
    """Create user tables from model metadata."""
    with create_app().app_context():
        sess = GlobalDB.db().session
        insert_codes(sess)
        sess.commit()
def setup_validation_db():
    """Create validation tables from model metadata and do initial inserts."""
    with create_app().app_context():
        sess = GlobalDB.db().session
        insert_codes(sess)
        sess.commit()
def validate_file_by_sql(job, file_type, short_to_long_dict):
    """ Check all SQL rules

    Args:
        job: the Job which is running
        file_type: file type being checked
        short_to_long_dict: mapping of short to long schema column names

    Returns:
        List of ValidationFailures
    """

    sql_val_start = datetime.now()
    log_string = 'on submission_id: {}, job_id: {}, file_type: {}'.format(
        str(job.submission_id), str(job.job_id), job.file_type.name)
    logger.info({
        'message': 'Beginning SQL validations {}'.format(log_string),
        'message_type': 'ValidatorInfo',
        'submission_id': job.submission_id,
        'job_id': job.job_id,
        'file_type': job.file_type.name,
        'action': 'run_sql_validations',
        'status': 'start',
        'start_time': sql_val_start
    })
    sess = GlobalDB.db().session

    # Pull all SQL rules for this file type
    file_id = FILE_TYPE_DICT[file_type]
    rules = sess.query(RuleSql).filter_by(file_id=file_id,
                                          rule_cross_file_flag=False)
    errors = []

    # For each rule, execute sql for rule
    for rule in rules:
        rule_start = datetime.now()
        logger.info({
            'message':
            'Beginning SQL validation rule {} {}'.format(
                rule.query_name, log_string),
            'message_type':
            'ValidatorInfo',
            'submission_id':
            job.submission_id,
            'job_id':
            job.job_id,
            'rule':
            rule.query_name,
            'file_type':
            job.file_type.name,
            'action':
            'run_sql_validation_rule',
            'status':
            'start',
            'start_time':
            rule_start
        })

        failures = sess.execute(rule.rule_sql.format(job.submission_id))
        if failures.rowcount:
            # Create column list (exclude row_number)
            cols = failures.keys()
            cols.remove("row_number")
            col_headers = [
                short_to_long_dict.get(field, field) for field in cols
            ]

            # materialize as we'll iterate over the failures twice
            failures = list(failures)
            flex_data = relevant_flex_data(failures, job.job_id)

            errors.extend(
                failure_row_to_tuple(rule, flex_data, cols, col_headers,
                                     file_id, failure) for failure in failures)

        rule_duration = (datetime.now() - rule_start).total_seconds()
        logger.info({
            'message':
            'Completed SQL validation rule {} {}'.format(
                rule.query_name, log_string),
            'message_type':
            'ValidatorInfo',
            'submission_id':
            job.submission_id,
            'job_id':
            job.job_id,
            'rule':
            rule.query_name,
            'file_type':
            job.file_type.name,
            'action':
            'run_sql_validation_rule',
            'status':
            'finish',
            'start_time':
            rule_start,
            'end_time':
            datetime.now(),
            'duration':
            rule_duration
        })

    sql_val_duration = (datetime.now() - sql_val_start).total_seconds()
    logger.info({
        'message': 'Completed SQL validations {}'.format(log_string),
        'message_type': 'ValidatorInfo',
        'submission_id': job.submission_id,
        'job_id': job.job_id,
        'file_type': job.file_type.name,
        'action': 'run_sql_validations',
        'status': 'finish',
        'start_time': sql_val_start,
        'end_time': datetime.now(),
        'duration': sql_val_duration
    })
    return errors
def cross_validate_sql(rules, submission_id, short_to_long_dict, first_file,
                       second_file, job, error_csv, warning_csv, error_list,
                       job_id):
    """ Evaluate all sql-based rules for cross file validation

    Args:
        rules -- List of Rule objects
        submission_id -- ID of submission to run cross-file validation
    """
    conn = GlobalDB.db().connection

    # Put each rule through evaluate, appending all failures into list
    for rule in rules:
        rule_start = datetime.now()
        logger.info({
            'message':
            'Beginning cross-file rule {} on submission_id: {}'.format(
                rule.query_name, str(submission_id)),
            'message_type':
            'ValidatorInfo',
            'rule':
            rule.query_name,
            'job_id':
            job.job_id,
            'submission_id':
            submission_id,
            'action':
            'run_cross_validation_rule',
            'status':
            'start',
            'start':
            rule_start
        })
        failed_rows = conn.execute(rule.rule_sql.format(submission_id))
        logger.info({
            'message':
            'Finished running cross-file rule {} on submission_id: {}.'.format(
                rule.query_name, str(submission_id)) +
            'Starting flex field gathering and file writing',
            'message_type':
            'ValidatorInfo',
            'rule':
            rule.query_name,
            'job_id':
            job.job_id,
            'submission_id':
            submission_id
        })
        if failed_rows.rowcount:
            # get list of fields involved in this validation
            # note: row_number is metadata, not a field being
            # validated, so exclude it
            cols = failed_rows.keys()
            cols.remove('row_number')
            column_string = ", ".join(
                short_to_long_dict[c] if c in short_to_long_dict else c
                for c in cols)

            # materialize as we'll iterate over the failed_rows twice
            failed_rows = list(failed_rows)
            num_failed_rows = len(failed_rows)
            slice_start = 0
            slice_size = 10000
            while slice_start <= num_failed_rows:
                # finding out row numbers for logger
                last_error_curr_slice = slice_start + slice_size
                failed_row_subset = failed_rows[
                    slice_start:last_error_curr_slice]
                if last_error_curr_slice > num_failed_rows:
                    last_error_curr_slice = num_failed_rows
                logger.info({
                    'message':
                    'Starting flex field gathering for cross-file rule ' +
                    '{} on submission_id: {} for '.format(
                        rule.query_name, str(submission_id)) +
                    'failure rows: {}-{}'.format(str(slice_start),
                                                 str(last_error_curr_slice)),
                    'message_type':
                    'ValidatorInfo',
                    'rule':
                    rule.query_name,
                    'job_id':
                    job.job_id,
                    'submission_id':
                    submission_id
                })
                flex_data = relevant_cross_flex_data(failed_row_subset,
                                                     submission_id,
                                                     [first_file, second_file])
                logger.info({
                    'message':
                    'Finished flex field gathering for cross-file rule ' +
                    '{} on submission_id: {} for '.format(
                        rule.query_name, str(submission_id)) +
                    'failure rows: {}-{}'.format(str(slice_start),
                                                 str(last_error_curr_slice)),
                    'message_type':
                    'ValidatorInfo',
                    'rule':
                    rule.query_name,
                    'job_id':
                    job.job_id,
                    'submission_id':
                    submission_id
                })

                for row in failed_row_subset:
                    # get list of values for each column
                    values = [
                        "{}: {}".format(short_to_long_dict[c], str(row[c]))
                        if c in short_to_long_dict else "{}: {}".format(
                            c, str(row[c])) for c in cols
                    ]
                    values = ", ".join(values)
                    full_column_string = column_string
                    # go through all flex fields in this row and add to the columns and values
                    for field in flex_data[row['row_number']]:
                        full_column_string += ", " + field.header + "_file" +\
                                              FILE_TYPE_DICT_LETTER[field.file_type_id].lower()
                        values += ", {}: {}".format(
                            field.header + "_file" +
                            FILE_TYPE_DICT_LETTER[field.file_type_id].lower(),
                            field.cell)

                    target_file_type = FILE_TYPE_DICT_ID[rule.target_file_id]

                    failure = [
                        rule.file.name, target_file_type, full_column_string,
                        str(rule.rule_error_message), values,
                        row['row_number'],
                        str(rule.rule_label), rule.file_id,
                        rule.target_file_id, rule.rule_severity_id
                    ]
                    if failure[9] == RULE_SEVERITY_DICT['fatal']:
                        error_csv.writerow(failure[0:7])
                    if failure[9] == RULE_SEVERITY_DICT['warning']:
                        warning_csv.writerow(failure[0:7])
                    error_list.record_row_error(job_id,
                                                "cross_file",
                                                failure[0],
                                                failure[3],
                                                failure[5],
                                                failure[6],
                                                failure[7],
                                                failure[8],
                                                severity_id=failure[9])
                slice_start = slice_start + slice_size

        rule_duration = (datetime.now() - rule_start).total_seconds()
        logger.info({
            'message':
            'Completed cross-file rule {} on submission_id: {}'.format(
                rule.query_name, str(submission_id)),
            'message_type':
            'ValidatorInfo',
            'rule':
            rule.query_name,
            'job_id':
            job.job_id,
            'submission_id':
            submission_id,
            'action':
            'run_cross_validation_rule',
            'status':
            'finish',
            'start':
            rule_start,
            'duration':
            rule_duration
        })
Пример #59
0
 def before_request():
     GlobalDB.db()
def generate_detached_file(file_type, cgac_code, frec_code, start_date,
                           end_date, year, period, agency_type, file_format):
    """ Start a file generation job for the specified file type not connected to a submission

        Args:
            file_type: type of file to be generated
            cgac_code: the code of a CGAC agency if generating for a CGAC agency
            frec_code: the code of a FREC agency if generating for a FREC agency
            start_date: start date in a string, formatted MM/DD/YYYY
            end_date: end date in a string, formatted MM/DD/YYYY
            year: year to generate for, integer 4 digits
            period: period to generate for, integer (2-12)
            agency_type: The type of agency (awarding or funding) to generate the file for
            file_format: determines if the file generated is a txt or a csv (only used for D file generation)

        Returns:
            JSONResponse object with keys job_id, status, file_type, url, message, start_date, and end_date.

        Raises:
            ResponseException: if the start_date and end_date Strings cannot be parsed into dates
    """
    # Make sure it's a valid request
    if not cgac_code and not frec_code:
        return JsonResponse.error(
            ValueError(
                "Detached file generation requires CGAC or FR Entity Code"),
            StatusCode.CLIENT_ERROR)

    if file_type in ['D1', 'D2']:
        # Make sure we have a start and end date for D1/D2 generation
        if not start_date or not end_date:
            return JsonResponse.error(
                ValueError(
                    'Must have a start and end date for D file generation.'),
                StatusCode.CLIENT_ERROR)

        # Check if date format is MM/DD/YYYY
        if not (StringCleaner.is_date(start_date)
                and StringCleaner.is_date(end_date)):
            raise ResponseException(
                'Start or end date cannot be parsed into a date',
                StatusCode.CLIENT_ERROR)

        if agency_type not in ['awarding', 'funding']:
            return JsonResponse.error(
                ValueError('agency_type must be either awarding or funding.'),
                StatusCode.CLIENT_ERROR)

        if file_format not in ['csv', 'txt']:
            return JsonResponse.error(
                ValueError('file_format must be either csv or txt.'),
                StatusCode.CLIENT_ERROR)
    else:
        # Make sure both year and period are provided
        if not (year and period):
            return JsonResponse.error(
                ValueError(
                    "Must have a year and period for A file generation."),
                StatusCode.CLIENT_ERROR)

        try:
            # Convert to real start and end dates
            start_date, end_date = generic_helper.year_period_to_dates(
                year, period)
        except ResponseException as e:
            return JsonResponse.error(e, StatusCode.CLIENT_ERROR)

    # Add job info
    file_type_name = lookups.FILE_TYPE_DICT_LETTER_NAME[file_type]
    new_job = generation_helper.create_generation_job(file_type_name,
                                                      start_date, end_date)

    agency_code = frec_code if frec_code else cgac_code
    logger.info({
        'message':
        'Starting detached {} file generation'.format(file_type),
        'message_type':
        'BrokerInfo',
        'job_id':
        new_job.job_id,
        'file_type':
        file_type,
        'agency_code':
        agency_code,
        'start_date':
        start_date,
        'end_date':
        end_date
    })

    try:
        if file_type in ['D1', 'D2']:
            generation_helper.start_d_generation(new_job,
                                                 start_date,
                                                 end_date,
                                                 agency_type,
                                                 agency_code=agency_code,
                                                 file_format=file_format)
        else:
            generation_helper.start_a_generation(new_job, start_date, end_date,
                                                 agency_code)
    except Exception as e:
        mark_job_status(new_job.job_id, 'failed')
        new_job.error_message = str(e)
        GlobalDB.db().session.commit()
        return JsonResponse.error(e, StatusCode.INTERNAL_ERROR)

    # Return same response as check generation route
    return check_detached_generation(new_job.job_id)