Пример #1
0
    def handle(self, *args, **options):
        logger.info('Starting updating awarding agencies...')

        fiscal_year = options.get('fiscal_year')[0]

        page = options.get('page')
        limit = options.get('limit')

        page = page[0] if page else 1
        limit = limit[0] if limit else 500000

        if options.get('contracts', None):
            with timer('D1 (contracts/FPDS) awarding/funding agencies updates',
                       logger.info):
                self.update_awarding_funding_agency(fiscal_year,
                                                    'D1',
                                                    page=page,
                                                    limit=limit)

        elif options.get('assistance', None):
            with timer(
                    'D2 (assistance/FABS) awarding/funding agencies updates',
                    logger.info):
                self.update_awarding_funding_agency(fiscal_year,
                                                    'D2',
                                                    page=page,
                                                    limit=limit)

        else:
            logger.error('Not a valid data type: --assistance,--contracts')

        logger.info('Finished')
Пример #2
0
    def handle(self, *args, **options):
        logger.info('Starting FABS nightly data load...')

        # Use date provided or pull most recent ExternalDataLoadDate
        if options.get('date'):
            date = options.get('date')[0]
        else:
            data_load_date_obj = ExternalDataLoadDate.objects. \
                filter(external_data_type_id=lookups.EXTERNAL_DATA_TYPE_DICT['fabs']).first()
            if not data_load_date_obj:
                date = (datetime.utcnow() - timedelta(days=1)).strftime('%Y-%m-%d')
            else:
                date = data_load_date_obj.last_load_date
        start_date = datetime.utcnow().strftime('%Y-%m-%d')

        logger.info('Processing data for FABS starting from %s' % date)

        # Retrieve FABS data
        with timer('retrieving/diff-ing FABS Data', logger.info):
            to_insert, ids_to_delete = self.get_fabs_data(date=date)

        total_rows = len(to_insert)
        total_rows_delete = len(ids_to_delete)

        if total_rows_delete > 0:
            # Create a file with the deletion IDs and place in a bucket for ElasticSearch
            self.send_deletes_to_elasticsearch(ids_to_delete)

            # Delete FABS records by ID
            with timer('deleting stale FABS data', logger.info):
                self.delete_stale_fabs(ids_to_delete=ids_to_delete)
        else:
            logger.info('Nothing to delete...')

        if total_rows > 0:
            # Add FABS records
            with timer('inserting new FABS data', logger.info):
                self.insert_new_fabs(to_insert=to_insert, total_rows=total_rows)

            # Update Awards based on changed FABS records
            with timer('updating awards to reflect their latest associated transaction info', logger.info):
                update_awards(tuple(award_update_id_list))

            # Update AwardCategories based on changed FABS records
            with timer('updating award category variables', logger.info):
                update_award_categories(tuple(award_update_id_list))
        else:
            logger.info('Nothing to insert...')

        # Update the date for the last time the data load was run
        ExternalDataLoadDate.objects.filter(external_data_type_id=lookups.EXTERNAL_DATA_TYPE_DICT['fabs']).delete()
        ExternalDataLoadDate(last_load_date=start_date,
                             external_data_type_id=lookups.EXTERNAL_DATA_TYPE_DICT['fabs']).save()

        logger.info('FABS NIGHTLY UPDATE FINISHED!')
Пример #3
0
def test_timer_times(capsys):
    'Verify that timer shows longer times for slower operations'

    pattern = re.compile(r'([\d\.e\-]+) sec')

    with timer():
        print('Doing a thing')
    output0 = capsys.readouterr()[0]
    time0 = float(pattern.search(output0).group(1))

    with timer():
        print('Doing a slower thing')
        time.sleep(0.1)
    output1 = capsys.readouterr()[0]
    time1 = float(pattern.search(output1).group(1))

    assert time1 > time0
Пример #4
0
def test_timer(capsys):
    'Verify that timer helper executes without error'

    with timer():
        print('Doing a thing')
    output = capsys.readouterr()[0]
    assert 'Beginning' in output
    assert 'finished' in output
    def handle(self, *args, **options):
        logger.info('Starting updates to award data...')

        all_records_flag = options.get('all')
        fiscal_year = options.get('fiscal_year')

        award_update_id_list = []
        award_contract_update_id_list = []

        if not all_records_flag:
            if fiscal_year:
                fiscal_year = fiscal_year[0]
                logger.info('Processing data for Fiscal Year ' +
                            str(fiscal_year))
            else:
                fiscal_year = 2017

            # Lists to store for update_awards and update_contract_awards
            award_update_id_list = TransactionNormalized.objects.filter(action_date__fy=fiscal_year).\
                values_list('award_id', flat=True)
            award_contract_update_id_list = TransactionFPDS.objects.filter(action_date__fy=fiscal_year).\
                values_list('transaction__award_id', flat=True)

        with timer(
                'updating awards to reflect their latest associated transaction info',
                logger.info):
            update_awards() if all_records_flag else update_awards(
                tuple(award_update_id_list))

        with timer(
                'updating contract-specific awards to reflect their latest transaction info...',
                logger.info):
            if all_records_flag:
                update_contract_awards()
            else:
                update_contract_awards(tuple(award_contract_update_id_list))

        with timer('updating award category variables', logger.info):
            update_award_categories(
            ) if all_records_flag else update_award_categories(
                tuple(award_update_id_list))

        # Done!
        logger.info('FINISHED')
Пример #6
0
    def handle(self, *args, **options):
        logger.info('Starting FPDS nightly data load...')

        if options.get('date'):
            date = options.get('date')[0]
            date = datetime.strptime(date, '%Y-%m-%d').date()
        else:
            data_load_date_obj = ExternalDataLoadDate.objects. \
                filter(external_data_type_id=lookups.EXTERNAL_DATA_TYPE_DICT['fpds']).first()
            if not data_load_date_obj:
                date = (datetime.utcnow() -
                        timedelta(days=1)).strftime('%Y-%m-%d')
            else:
                date = data_load_date_obj.last_load_date
        start_date = datetime.utcnow().strftime('%Y-%m-%d')

        logger.info('Processing data for FPDS starting from %s' % date)

        with timer('retrieving/diff-ing FPDS Data', logger.info):
            to_insert, ids_to_delete = self.get_fpds_data(date=date)

        total_rows = len(to_insert)
        total_rows_delete = len(ids_to_delete)

        if total_rows_delete > 0:
            with timer('deleting stale FPDS data', logger.info):
                self.delete_stale_fpds(ids_to_delete=ids_to_delete)
        else:
            logger.info('Nothing to delete...')

        if total_rows > 0:
            with timer('inserting new FPDS data', logger.info):
                self.insert_new_fpds(to_insert=to_insert,
                                     total_rows=total_rows)

            with timer(
                    'updating awards to reflect their latest associated transaction info',
                    logger.info):
                update_awards(tuple(award_update_id_list))

            with timer(
                    'updating contract-specific awards to reflect their latest transaction info',
                    logger.info):
                update_contract_awards(tuple(award_update_id_list))

            with timer('updating award category variables', logger.info):
                update_award_categories(tuple(award_update_id_list))
        else:
            logger.info('Nothing to insert...')

        # Update the date for the last time the data load was run
        ExternalDataLoadDate.objects.filter(
            external_data_type_id=lookups.EXTERNAL_DATA_TYPE_DICT['fpds']
        ).delete()
        ExternalDataLoadDate(last_load_date=start_date,
                             external_data_type_id=lookups.
                             EXTERNAL_DATA_TYPE_DICT['fpds']).save()

        logger.info('FPDS NIGHTLY UPDATE FINISHED!')
    def handle(self, *args, **options):
        logger.info('Starting historical data load...')

        db_cursor = connections['data_broker'].cursor()
        fiscal_year = options.get('fiscal_year')
        page = options.get('page')
        limit = options.get('limit')
        save = options.get('save')

        if fiscal_year:
            fiscal_year = fiscal_year[0]
            logger.info('Processing data for Fiscal Year ' + str(fiscal_year))
        else:
            fiscal_year = 2017

        page = page[0] if page else 1
        limit = limit[0] if limit else 500000

        if not options['assistance']:
            with timer('D1 historical data location insert', logger.info):
                self.update_location_transaction_contract(
                    db_cursor=db_cursor,
                    fiscal_year=fiscal_year,
                    page=page,
                    limit=limit,
                    save=save)

        if not options['contracts']:
            with timer('D2 historical data location insert', logger.info):
                self.update_location_transaction_assistance(
                    db_cursor=db_cursor,
                    fiscal_year=fiscal_year,
                    page=page,
                    limit=limit,
                    save=save)

        logger.info('FINISHED')
    def handle(self, *args, **options):
        logger.info('Starting historical data load...')

        db_cursor = connections['data_broker'].cursor()
        fiscal_year = options.get('fiscal_year')
        page = options.get('page')
        limit = options.get('limit')

        if fiscal_year:
            fiscal_year = fiscal_year[0]
            logger.info('Processing data for Fiscal Year ' + str(fiscal_year))
        else:
            fiscal_year = 2017

        page = page[0] if page else 1
        limit = limit[0] if limit else 500000

        if not options['assistance']:
            with timer('D1 historical data load', logger.info):
                self.update_transaction_contract(db_cursor=db_cursor, fiscal_year=fiscal_year, page=page, limit=limit)

        if not options['contracts']:
            with timer('D2 historical data load', logger.info):
                self.update_transaction_assistance(db_cursor=db_cursor, fiscal_year=fiscal_year, page=page, limit=limit)

        with timer('updating awards to reflect their latest associated transaction info', logger.info):
            update_awards(tuple(award_update_id_list))

        with timer('updating contract-specific awards to reflect their latest transaction info', logger.info):
            update_contract_awards(tuple(award_contract_update_id_list))

        with timer('updating award category variables', logger.info):
            update_award_categories(tuple(award_update_id_list))

        # Done!
        logger.info('FINISHED')
Пример #9
0
    def handle(self, *args, **options):
        logger.info('Starting row deletion...')

        if options['batches']:
            limit = options['batches'] * options['batchsize']
        else:
            limit = None
        with timer('executing query', logger.info):
            cursor = self.fabs_cursor(limit)
        batch_no = 1
        while ((not options['batches']) or (batch_no <= options['batches'])):
            message = 'Batch {} of {} rows'.format(batch_no,
                                                   options['batchsize'])
            with timer(message, logging.info):
                rows = cursor.fetchmany(options['batchsize'])
            if not rows:
                logger.info('No further rows; finished')
                return
            ids = [r[0] for r in rows]
            with timer('deleting rows', logger.info):
                TransactionNormalized.objects.\
                    filter(assistance_data__afa_generated_unique__in=ids).delete()
            batch_no += 1
        logger.info('{} batches finished, complete'.format(batch_no - 1))
Пример #10
0
    def handle(self, *args, **options):
        """
        Updates the column ussgl498100_upward_adjust_pri_deliv_orders_oblig_unpaid_cpe due to the incorrect
        mapping in settings.py
        """
        ds_cursor = connection.cursor()
        logger.info('Begin updating file B and C')

        broker_cols_b = self.get_list_of_broker_cols(financial_accounts_oc)
        broker_cols_type_b = self.get_list_of_broker_cols_types(financial_accounts_oc)
        website_cols_b = self.get_website_row_formatted(financial_accounts_oc)
        website_update_text_b = self.get_cols_to_update(financial_accounts_oc)
        website_cols_joins_b = self.get_file_table_joins(financial_accounts_oc)

        broker_cols_c = self.get_list_of_broker_cols(financial_accounts_awards)
        broker_cols_type_c = self.get_list_of_broker_cols_types(financial_accounts_awards)
        website_cols_c = self.get_website_row_formatted(financial_accounts_awards)
        website_update_text_c = self.get_cols_to_update(financial_accounts_awards)
        website_cols_joins_c = self.get_file_table_joins(financial_accounts_awards)

        with timer('getting submission ids to update', logger.info):
            submissions_to_update = self.get_list_of_submissions()

        for submission in submissions_to_update:
            submission_id = submission[0]

            # File B Updates
            logger.info('loading rows data to update File B submission {}'.format(submission_id))

            with timer('retrieving rows to update for File B submission {}'.format(submission_id), logger.info):
                get_rows_to_update_query = self.get_rows_to_update('B', submission_id,
                                                                   broker_cols_b, broker_cols_type_b,
                                                                   website_cols_b)
                ds_cursor.execute(get_rows_to_update_query)

            with timer('updating rows for File B submission {}'.format(submission_id), logger.info):
                update_rows = self.update_website_rows(
                    'financial_accounts_by_program_activity_object_class',
                    'file_b_rows_to_update', website_update_text_b, website_cols_joins_b
                )

                ds_cursor.execute(update_rows)

            # File C updates
            with timer('retrieving rows to update for File C submission {}'.format(submission_id), logger.info):
                get_rows_to_update_query = self.get_rows_to_update(
                    'C',
                    submission_id,
                    broker_cols_c,
                    broker_cols_type_c,
                    website_cols_c)
                ds_cursor.execute(get_rows_to_update_query)

            with timer('updating rows for File C submission {}'.format(submission_id), logger.info):
                update_rows = self.update_website_rows(
                    'financial_accounts_by_awards', 'file_c_rows_to_update', website_update_text_c, website_cols_joins_c
                )
                ds_cursor.execute(update_rows)

            ds_cursor.execute("DROP TABLE file_b_rows_to_update")
            ds_cursor.execute("DROP TABLE file_c_rows_to_update")

        logger.info('Done updating file B and C mappings')
Пример #11
0
    def handle(self, *args, **options):
        logger.info('Starting FABS bulk data load...')

        db_cursor = connections['data_broker'].cursor()
        ds_cursor = connection.cursor()
        fiscal_year = options.get('fiscal_year')

        if fiscal_year:
            fiscal_year = fiscal_year[0]
        else:
            fiscal_year = 2017

        logger.info('Processing data for Fiscal Year ' + str(fiscal_year))

        with timer('Diff-ing FABS data', logger.info):
            to_insert, to_delete = self.diff_fabs_data(db_cursor=db_cursor,
                                                       ds_cursor=ds_cursor,
                                                       fiscal_year=fiscal_year)

        total_rows = len(to_insert)
        total_rows_delete = len(to_delete)

        if total_rows_delete > 0:
            with timer('Deleting stale FABS data', logger.info):
                self.delete_stale_fabs(to_delete=to_delete)

        if total_rows > 0:
            # Set lookups after deletions to only get latest
            self.set_lookup_maps()

            with timer('Get Broker FABS data', logger.info):
                fabs_broker_data = self.get_fabs_data(db_cursor=db_cursor,
                                                      fiscal_year=fiscal_year,
                                                      to_insert=to_insert)

            with timer('Loading POP Location data...', logger.info):
                self.load_locations(fabs_broker_data=fabs_broker_data,
                                    total_rows=total_rows,
                                    pop_flag=True)

            with timer('Loading LE Location data', logger.info):
                self.load_locations(fabs_broker_data=fabs_broker_data,
                                    total_rows=total_rows)

            with timer('Loading Legal Entity data', logger.info):
                self.load_legal_entity(fabs_broker_data=fabs_broker_data,
                                       total_rows=total_rows)

            with timer('Loading Award data', logger.info):
                self.load_awards(fabs_broker_data=fabs_broker_data,
                                 total_rows=total_rows)

            with timer('Loading Transaction Normalized data', logger.info):
                self.load_transaction_normalized(
                    fabs_broker_data=fabs_broker_data, total_rows=total_rows)

            with timer('Loading Transaction FABS data', logger.info):
                self.load_transaction_fabs(fabs_broker_data, total_rows)

            award_update_id_list = [award.id for award in award_lookup]

            with timer(
                    'Updating awards to reflect their latest associated transaction info',
                    logger.info):
                update_awards(tuple(award_update_id_list))

            with timer('Updating award category variables', logger.info):
                update_award_categories(tuple(award_update_id_list))
        else:
            logger.info('Nothing to insert...FINISHED!')