Python dictfetchall примеры, usaspending_api.etl.broker_etl_helpers.dictfetchall Python примеры использования

Пример #1

0

Показать файл

Файл: load_dabs_submission_window_schedule.py Проект: umeshh/usaspending-api

    def handle(self, *args, **options):
        logger.info("Creating broker cursor")
        broker_cursor = connections["data_broker"].cursor()

        logger.info("Running MONTH_SCHEDULE_SQL")
        broker_cursor.execute(MONTH_SCHEDULE_SQL)

        logger.info("Getting month schedule values from cursor")
        month_schedule_values = dictfetchall(broker_cursor)

        logger.info("Running QUARTER_SCHEDULE_SQL")
        broker_cursor.execute(QUARTER_SCHEDULE_SQL)

        logger.info("Getting quarter schedule values from cursor")
        quarter_schedule_values = dictfetchall(broker_cursor)

        logger.info("Deleting existing DABS Submission Window Schedule")
        DABSSubmissionWindowSchedule.objects.all().delete()

        logger.info("Inserting DABS Submission Window Schedule into website")
        submission_schedule_objs = [
            DABSSubmissionWindowSchedule(**values)
            for values in month_schedule_values
        ]
        submission_schedule_objs += [
            DABSSubmissionWindowSchedule(**values)
            for values in quarter_schedule_values
        ]
        DABSSubmissionWindowSchedule.objects.bulk_create(
            submission_schedule_objs)

        logger.info(
            "DABS Submission Window Schedule loader finished successfully!")

Пример #2

0

Показать файл

Файл: bulk_load_fpds.py Проект: ece-jacob-scott/usaspending-api

    def diff_fpds_data(self, db_cursor, ds_cursor, fiscal_year=None):
        db_query = 'SELECT detached_award_procurement_id ' \
            'FROM detached_award_procurement'
        db_arguments = []

        ds_query = 'SELECT detached_award_procurement_id ' \
                   'FROM transaction_fpds'
        ds_arguments = []

        if fiscal_year:
            if db_arguments:
                db_query += ' AND'
            else:
                db_query += ' WHERE'

            if ds_arguments:
                ds_query += ' AND'
            else:
                ds_query += ' WHERE'

            fy_begin = '10/01/' + str(fiscal_year - 1)
            fy_end = '09/30/' + str(fiscal_year)

            db_query += ' action_date::Date BETWEEN %s AND %s'
            db_arguments += [fy_begin, fy_end]

            ds_query += ' action_date::Date BETWEEN %s AND %s'
            ds_arguments += [fy_begin, fy_end]

        db_cursor.execute(db_query, db_arguments)
        ds_cursor.execute(ds_query, ds_arguments)

        db_dict = dictfetchall(db_cursor)
        ds_dict = dictfetchall(ds_cursor)

        db_set = set(
            map(
                lambda db_entry: int(db_entry['detached_award_procurement_id']
                                     ), db_dict))
        ds_set = set(
            map(
                lambda ds_entry: int(ds_entry['detached_award_procurement_id']
                                     ), ds_dict))

        to_insert = db_set - ds_set
        to_delete = ds_set - db_set

        logger.info('Number of records to insert: %s' % str(len(to_insert)))
        logger.info('Number of records to delete: %s' % str(len(to_delete)))

        # Return what is not currently in our database (to insert) and what we have that Broker does not (to delete)
        return to_insert, to_delete

Пример #3

0

Показать файл

Файл: load_submission.py Проект: fedspendingtransparency/usaspending-api

def get_treasury_appropriation_account_tas_lookup(tas_lookup_id, db_cursor):
    """Get the matching TAS object from the broker database and save it to our running list."""
    if tas_lookup_id in TAS_ID_TO_ACCOUNT:
        return TAS_ID_TO_ACCOUNT[tas_lookup_id]
    # Checks the broker DB tas_lookup table for the tas_id and returns the matching TAS object in the datastore
    db_cursor.execute("SELECT * FROM tas_lookup WHERE (financial_indicator2 <> 'F' OR financial_indicator2 IS NULL) "
                      "AND account_num = %s", [tas_lookup_id])
    tas_data = dictfetchall(db_cursor)

    if tas_data is None or len(tas_data) == 0:
        return None, 'Account number {} not found in Broker'.format(tas_lookup_id)

    tas_rendering_label = TreasuryAppropriationAccount.generate_tas_rendering_label(
        ata=tas_data[0]["allocation_transfer_agency"],
        aid=tas_data[0]["agency_identifier"],
        typecode=tas_data[0]["availability_type_code"],
        bpoa=tas_data[0]["beginning_period_of_availa"],
        epoa=tas_data[0]["ending_period_of_availabil"],
        mac=tas_data[0]["main_account_code"],
        sub=tas_data[0]["sub_account_code"]
    )

    TAS_ID_TO_ACCOUNT[tas_lookup_id] = (TreasuryAppropriationAccount.objects.
                                        filter(tas_rendering_label=tas_rendering_label).first(), tas_rendering_label)
    return TAS_ID_TO_ACCOUNT[tas_lookup_id]

Пример #4

0

Показать файл

def get_treasury_appropriation_account_tas_lookup(tas_lookup_id, db_cursor):
    """Get the matching TAS object from the broker database and save it to our running list."""
    if tas_lookup_id in TAS_ID_TO_ACCOUNT:
        return TAS_ID_TO_ACCOUNT[tas_lookup_id]
    # Checks the broker DB tas_lookup table for the tas_id and returns the matching TAS object in the datastore
    db_cursor.execute(
        "SELECT * FROM tas_lookup WHERE (financial_indicator2 <> 'F' OR financial_indicator2 IS NULL) "
        "AND account_num = %s",
        [tas_lookup_id],
    )
    tas_data = dictfetchall(db_cursor)

    if tas_data is None or len(tas_data) == 0:
        return None, "Account number {} not found in Broker".format(
            tas_lookup_id)

    tas_rendering_label = TreasuryAppropriationAccount.generate_tas_rendering_label(
        ata=tas_data[0]["allocation_transfer_agency"],
        aid=tas_data[0]["agency_identifier"],
        typecode=tas_data[0]["availability_type_code"],
        bpoa=tas_data[0]["beginning_period_of_availa"],
        epoa=tas_data[0]["ending_period_of_availabil"],
        mac=tas_data[0]["main_account_code"],
        sub=tas_data[0]["sub_account_code"],
    )

    TAS_ID_TO_ACCOUNT[tas_lookup_id] = (
        TreasuryAppropriationAccount.objects.filter(
            tas_rendering_label=tas_rendering_label).first(),
        tas_rendering_label,
    )
    return TAS_ID_TO_ACCOUNT[tas_lookup_id]

Пример #5

0

Показать файл

Файл: load_fsrs.py Проект: abduncan/usaspending-api

    def get_award_data(db_cursor, award_type, max_id, internal_ids=None):
        """ Gets data for all new awards from broker with ID greater than the ones already stored for the given
            award type
        """
        query_columns = ['internal_id']

        # we need different columns depending on if it's a procurement or a grant
        if award_type == 'procurement':
            query_columns.extend([
                'contract_number',
                'idv_reference_number',
                'contracting_office_aid',
                'contract_agency_code',
                'contract_idv_agency_code',
            ])
        else:
            # TODO contracting_office_aid equivalent? Do we even need it?
            query_columns.extend(['fain'])

        if isinstance(internal_ids, list) and len(internal_ids) > 0:
            ids_string = ','.join([str(id).lower() for id in internal_ids])
            query = "SELECT {} FROM fsrs_{} WHERE internal_id = ANY(\'{{{}}}\'::text[]) ORDER BY id".format(
                ",".join(query_columns), award_type, ids_string)
        else:
            query = "SELECT {} FROM fsrs_{} WHERE id > {} ORDER BY id".format(
                ",".join(query_columns), award_type, str(max_id))

        db_cursor.execute(query)

        return dictfetchall(db_cursor)

Пример #6

0

Показать файл

def get_treasury_appropriation_account_tas_lookup(tas_lookup_id, db_cursor):
    """Get the matching TAS object from the broker database and save it to our running list."""
    if tas_lookup_id in TAS_ID_TO_ACCOUNT:
        return TAS_ID_TO_ACCOUNT[tas_lookup_id]
    # Checks the broker DB tas_lookup table for the tas_id and returns the matching TAS object in the datastore
    db_cursor.execute('SELECT * FROM tas_lookup WHERE account_num = %s',
                      [tas_lookup_id])
    tas_data = dictfetchall(db_cursor)

    # These or "" convert from none to a blank string, which is how the TAS table stores nulls
    q_kwargs = {
        "allocation_transfer_agency_id":
        tas_data[0]["allocation_transfer_agency"] or "",
        "agency_id":
        tas_data[0]["agency_identifier"] or "",
        "beginning_period_of_availability":
        tas_data[0]["beginning_period_of_availa"] or "",
        "ending_period_of_availability":
        tas_data[0]["ending_period_of_availabil"] or "",
        "availability_type_code":
        tas_data[0]["availability_type_code"] or "",
        "main_account_code":
        tas_data[0]["main_account_code"] or "",
        "sub_account_code":
        tas_data[0]["sub_account_code"] or ""
    }

    TAS_ID_TO_ACCOUNT[
        tas_lookup_id] = TreasuryAppropriationAccount.objects.filter(
            Q(**q_kwargs)).first()
    return TAS_ID_TO_ACCOUNT[tas_lookup_id]

Пример #7

0

Показать файл

    def get_fabs_data(date):
        db_cursor = connections['data_broker'].cursor()

        # The ORDER BY is important here because deletions must happen in a specific order and that order is defined
        # by the Broker's PK since every modification is a new row
        db_query = 'SELECT * ' \
                   'FROM published_award_financial_assistance ' \
                   'WHERE created_at >= %s ' \
                   'AND (is_active IS True OR UPPER(correction_delete_indicatr) = \'D\')'
        db_args = [date]

        db_cursor.execute(db_query, db_args)
        db_rows = dictfetchall(db_cursor)  # this returns an OrderedDict

        ids_to_delete = []
        final_db_rows = []

        # Iterate through the result dict and determine what needs to be deleted and what needs to be added
        for row in db_rows:
            if row['correction_delete_indicatr'] and row[
                    'correction_delete_indicatr'].upper() == 'D':
                ids_to_delete.append(row['afa_generated_unique'].upper())
            else:
                final_db_rows.append(row)

        logger.info('Number of records to insert/update: %s' %
                    str(len(final_db_rows)))
        logger.info('Number of records to delete: %s' %
                    str(len(ids_to_delete)))

        return final_db_rows, ids_to_delete

Пример #8

0

Показать файл

Файл: load_submission.py Проект: umeshh/usaspending-api

    def get_broker_submission(self):
        self.db_cursor.execute(
            f"""
                select
                    s.submission_id,
                    (
                        select  max(updated_at)
                        from    publish_history
                        where   submission_id = s.submission_id
                    )::timestamptz as published_date,
                    (
                        select  max(updated_at)
                        from    certify_history
                        where   submission_id = s.submission_id
                    )::timestamptz as certified_date,
                    coalesce(s.cgac_code, s.frec_code) as toptier_code,
                    s.reporting_start_date,
                    s.reporting_end_date,
                    s.reporting_fiscal_year,
                    s.reporting_fiscal_period,
                    s.is_quarter_format,
                    s.d2_submission,
                    s.publish_status_id
                from
                    submission as s
                where
                    s.submission_id = %s
            """,
            [self.submission_id],
        )

        return dictfetchall(self.db_cursor)

Пример #9

0

Показать файл

    def gather_new_duns(self, db_cursor, update_date, latest_broker_duns_id):
        new_duns_query = "SELECT * FROM duns " \
                         "WHERE updated_at > \'" + str(update_date) + "\' AND " \
                         "duns_id > " + str(latest_broker_duns_id)
        logger.info('Gathering duns created since last update')
        db_cursor.execute(new_duns_query)
        new_duns = dictfetchall(db_cursor)

        update_duns_query = "SELECT * FROM duns " \
                            "WHERE updated_at > \'" + str(update_date) + "\' AND " \
                            "duns_id <= " + str(latest_broker_duns_id)
        logger.info('Gathering duns updated since last update')
        db_cursor.execute(update_duns_query)
        update_duns = dictfetchall(db_cursor)

        return new_duns, update_duns

Пример #10

0

Показать файл

Файл: bulk_load_fpds.py Проект: ece-jacob-scott/usaspending-api

    def get_fpds_data(self, db_cursor, fiscal_year=None, to_insert=None):
        query = 'SELECT * FROM detached_award_procurement'
        arguments = []

        if to_insert:
            if arguments:
                query += ' AND'
            else:
                query += ' WHERE'
            query += ' detached_award_procurement_id IN %s'
            arguments += [tuple(to_insert)]

        if fiscal_year:
            if arguments:
                query += ' AND'
            else:
                query += ' WHERE'
            fy_begin = '10/01/' + str(fiscal_year - 1)
            fy_end = '09/30/' + str(fiscal_year)
            query += ' action_date::Date BETWEEN %s AND %s'
            arguments += [fy_begin, fy_end]

        query += ' ORDER BY detached_award_procurement_id'

        logger.info("Executing select query on Broker DB")

        db_cursor.execute(query, arguments)

        logger.info("Running dictfetchall on db_cursor")
        return dictfetchall(db_cursor)

Пример #11

0

Показать файл

Файл: bulk_load_fpds.py Проект: fedspendingtransparency/usaspending-api

    def get_fpds_data(self, db_cursor, fiscal_year=None, to_insert=None):
        query = 'SELECT * FROM detached_award_procurement'
        arguments = []

        if to_insert:
            if arguments:
                query += ' AND'
            else:
                query += ' WHERE'
            query += ' detached_award_procurement_id IN %s'
            arguments += [tuple(to_insert)]

        if fiscal_year:
            if arguments:
                query += ' AND'
            else:
                query += ' WHERE'
            fy_begin = '10/01/' + str(fiscal_year - 1)
            fy_end = '09/30/' + str(fiscal_year)
            query += ' action_date::Date BETWEEN %s AND %s'
            arguments += [fy_begin, fy_end]

        query += ' ORDER BY detached_award_procurement_id'

        logger.info("Executing select query on Broker DB")

        db_cursor.execute(query, arguments)

        logger.info("Running dictfetchall on db_cursor")
        return dictfetchall(db_cursor)

Пример #12

0

Показать файл

Файл: file_c.py Проект: jbuendiallc/usaspending-api

 def tas_ids(self):
     sql = f"""
         select distinct c.tas_id
         {self.get_from_where(self.submission_attributes.submission_id)}
         and c.tas_id is not null
     """
     self.db_cursor.execute(sql)
     return dictfetchall(self.db_cursor)

Пример #13

0

Показать файл

Файл: bulk_load_fpds.py Проект: fedspendingtransparency/usaspending-api

    def diff_fpds_data(self, db_cursor, ds_cursor, fiscal_year=None):
        db_query = 'SELECT detached_award_procurement_id ' \
            'FROM detached_award_procurement'
        db_arguments = []

        ds_query = 'SELECT detached_award_procurement_id ' \
                   'FROM transaction_fpds'
        ds_arguments = []

        if fiscal_year:
            if db_arguments:
                db_query += ' AND'
            else:
                db_query += ' WHERE'

            if ds_arguments:
                ds_query += ' AND'
            else:
                ds_query += ' WHERE'

            fy_begin = '10/01/' + str(fiscal_year - 1)
            fy_end = '09/30/' + str(fiscal_year)

            db_query += ' action_date::Date BETWEEN %s AND %s'
            db_arguments += [fy_begin, fy_end]

            ds_query += ' action_date::Date BETWEEN %s AND %s'
            ds_arguments += [fy_begin, fy_end]

        db_cursor.execute(db_query, db_arguments)
        ds_cursor.execute(ds_query, ds_arguments)

        db_dict = dictfetchall(db_cursor)
        ds_dict = dictfetchall(ds_cursor)

        db_set = set(map(lambda db_entry: int(db_entry['detached_award_procurement_id']), db_dict))
        ds_set = set(map(lambda ds_entry: int(ds_entry['detached_award_procurement_id']), ds_dict))

        to_insert = db_set - ds_set
        to_delete = ds_set - db_set

        logger.info('Number of records to insert: %s' % str(len(to_insert)))
        logger.info('Number of records to delete: %s' % str(len(to_delete)))

        # Return what is not currently in our database (to insert) and what we have that Broker does not (to delete)
        return to_insert, to_delete

Пример #14

0

Показать файл

Файл: executive_compensation_etl.py Проект: fedspendingtransparency/usaspending-api

def load_executive_compensation(db_cursor, date, start_date):
    logger.info("Getting DUNS/Exec Comp data from broker based on the last pull date of %s..." % str(date))

    # Get first page
    db_cursor.execute(EXEC_COMP_QUERY, [date])
    exec_comp_query_dict = dictfetchall(db_cursor)

    total_rows = len(exec_comp_query_dict)
    logger.info('Updating Executive Compensation Data, {} rows coming from the Broker...'.format(total_rows))

    start_time = datetime.now(timezone.utc)

    for index, row in enumerate(exec_comp_query_dict, 1):

        if not (index % 100):
            logger.info('Loading row {} of {} ({})'.format(str(index),
                                                           str(total_rows),
                                                           datetime.now() - start_time))

        leo_update_dict = {
            "officer_1_name": row['high_comp_officer1_full_na'],
            "officer_1_amount": row['high_comp_officer1_amount'],
            "officer_2_name": row['high_comp_officer2_full_na'],
            "officer_2_amount": row['high_comp_officer2_amount'],
            "officer_3_name": row['high_comp_officer3_full_na'],
            "officer_3_amount": row['high_comp_officer3_amount'],
            "officer_4_name": row['high_comp_officer4_full_na'],
            "officer_4_amount": row['high_comp_officer4_amount'],
            "officer_5_name": row['high_comp_officer5_full_na'],
            "officer_5_amount": row['high_comp_officer5_amount'],
        }

        any_data = False
        for attr, value in leo_update_dict.items():
            if value and value != "":
                any_data = True
                break

        if not any_data:
            continue

        duns_number = row['awardee_or_recipient_uniqu']

        # Deal with multiples that we have in our LE table
        legal_entities = LegalEntity.objects.filter(recipient_unique_id=duns_number)
        if not legal_entities.exists():
            logger.info('No record in data store for DUNS {}. Skipping...'.format(duns_number))

        for le in legal_entities:
            leo, _ = LegalEntityOfficers.objects.get_or_create(legal_entity=le)
            for attr, value in leo_update_dict.items():
                if value == "":
                    value = None
                setattr(leo, attr, value)
            leo.save()

    # Update the date for the last time the data load was run
    update_last_load_date("exec_comp", start_date)

Пример #15

0

Показать файл

    def get_broker_submission(self):
        self.db_cursor.execute(
            f"""
                with publish_certify_history as (
                    select
                        distinct_pairings.submission_id,
                        jsonb_agg(
                            jsonb_build_object(
                                'published_date', ph.updated_at::timestamptz,
                                'certified_date', ch.updated_at::timestamptz
                            )
                        ) AS history
                    from
                        (
                            select distinct
                                submission_id,
                                publish_history_id,
                                certify_history_id
                            from published_files_history
                            where submission_id = %s
                        ) as distinct_pairings
                    left outer join
                        publish_history as ph using (publish_history_id)
                    left outer join
                        certify_history as ch using (certify_history_id)
                    group by distinct_pairings.submission_id
                )
                select
                    s.submission_id,
                    (
                        select  max(updated_at)
                        from    publish_history
                        where   submission_id = s.submission_id
                    )::timestamptz as published_date,
                    (
                        select  max(updated_at)
                        from    certify_history
                        where   submission_id = s.submission_id
                    )::timestamptz as certified_date,
                    coalesce(s.cgac_code, s.frec_code) as toptier_code,
                    s.reporting_start_date,
                    s.reporting_end_date,
                    s.reporting_fiscal_year,
                    s.reporting_fiscal_period,
                    s.is_quarter_format,
                    s.d2_submission,
                    s.publish_status_id,
                    pch.history
                from
                    submission as s
                inner join
                    publish_certify_history as pch using (submission_id)
            """,
            [self.submission_id],
        )

        return dictfetchall(self.db_cursor)

Пример #16

0

Показать файл

def bulk_treasury_appropriation_account_tas_lookup(rows, db_cursor):

    # Eliminate nulls, TAS we already know about, and remove duplicates.
    tas_lookup_ids = tuple(
        set(r["tas_id"] for r in rows
            if (r["tas_id"] and r["tas_id"] not in TAS_ID_TO_ACCOUNT)))

    if not tas_lookup_ids:
        return

    db_cursor.execute(
        """
            select  distinct
                    account_num,
                    allocation_transfer_agency,
                    agency_identifier,
                    availability_type_code,
                    beginning_period_of_availa,
                    ending_period_of_availabil,
                    main_account_code,
                    sub_account_code
            from    tas_lookup
            where   account_num in %s
                    and (
                        financial_indicator2 != 'F'
                        or financial_indicator2 is null
                    )
        """,
        [tas_lookup_ids],
    )
    tas_data = dictfetchall(db_cursor)

    tas_rendering_labels = {
        tas["account_num"]:
        TreasuryAppropriationAccount.generate_tas_rendering_label(
            ata=tas["allocation_transfer_agency"],
            aid=tas["agency_identifier"],
            typecode=tas["availability_type_code"],
            bpoa=tas["beginning_period_of_availa"],
            epoa=tas["ending_period_of_availabil"],
            mac=tas["main_account_code"],
            sub=tas["sub_account_code"],
        )
        for tas in tas_data
    }

    taa_objects = {
        taa.tas_rendering_label: taa
        for taa in TreasuryAppropriationAccount.objects.filter(
            tas_rendering_label__in=tas_rendering_labels.values())
    }

    TAS_ID_TO_ACCOUNT.update({
        tid: (taa_objects.get(tas_rendering_labels.get(tid)),
              tas_rendering_labels.get(tid))
        for tid in tas_lookup_ids
    })

Пример #17

0

Показать файл

Файл: derive_office_names.py Проект: fedspendingtransparency/usaspending-api

    def handle(self, *args, **options):
        # Grab the data broker database connections
        if not options['test']:
            try:
                db_conn = connections['data_broker']
                db_cursor = db_conn.cursor()
            except Exception as err:
                logger.critical('Could not connect to database. Is DATA_BROKER_DATABASE_URL set?')
                logger.critical(print(err))
                return
        else:
            db_cursor = PhonyCursor()
        ds_cursor = connection.cursor()

        logger.info('Creating a temporary Office table copied from the Broker...')
        db_cursor.execute('SELECT office_name, office_code FROM office')
        all_offices = dictfetchall(db_cursor)
        all_offices_list = []
        for o in all_offices:
            office_name = o['office_name'].replace("'", "''")
            office_code = o['office_code'].replace("'", "''")
            all_offices_list.append("('" + office_name + "','" + office_code + "')")
        all_offices_str = ', '.join(all_offices_list)

        ds_cursor.execute('CREATE TABLE temp_broker_office (office_name TEXT, office_code TEXT)')
        ds_cursor.execute('INSERT INTO temp_broker_office (office_name, office_code) VALUES ' + all_offices_str)

        logger.info('Deriving FABS awarding_office_names with awarding_office_codes from the temporary Office table...')
        ds_cursor.execute(
            "UPDATE transaction_fabs AS t_fabs "
            "SET awarding_office_name = office.office_name "
            "FROM temp_broker_office AS office "
            "WHERE t_fabs.awarding_office_code = office.office_code "
            "  AND t_fabs.action_date >= '2018-10-01' "
            "  AND t_fabs.awarding_office_name IS NULL "
            "  AND t_fabs.awarding_office_code IS NOT NULL")
        logger.info(ds_cursor.rowcount)
        # logger.info('Made changes to {} records'.format(ds_cursor.results))

        logger.info('Deriving FABS funding_office_names with funding_office_codes from the temporary Office table...')
        ds_cursor.execute(
            "UPDATE transaction_fabs AS t_fabs "
            "SET funding_office_name = office.office_name "
            "FROM temp_broker_office AS office "
            "WHERE t_fabs.funding_office_code = office.office_code "
            "  AND t_fabs.action_date >= '2018-10-01' "
            "  AND t_fabs.funding_office_name IS NULL "
            "  AND t_fabs.funding_office_code IS NOT NULL")
        logger.info(ds_cursor.rowcount)
        # logger.info('Made changes to {} records'.format(ds_cursor.results))

        logger.info('Dropping temporary Office table...')
        ds_cursor.execute('DROP TABLE temp_broker_office')

        logger.info('Finished derivations.')

Пример #18

0

Показать файл

    def generate_schedules_from_broker(self):

        logger.info("Creating broker cursor")
        broker_cursor = connections["data_broker"].cursor()

        logger.info("Running MONTH_SCHEDULE_SQL")
        broker_cursor.execute(MONTH_SCHEDULE_SQL)

        logger.info("Getting month schedule values from cursor")
        month_schedule_values = dictfetchall(broker_cursor)

        logger.info("Running QUARTER_SCHEDULE_SQL")
        broker_cursor.execute(QUARTER_SCHEDULE_SQL)

        logger.info("Getting quarter schedule values from cursor")
        quarter_schedule_values = dictfetchall(broker_cursor)

        submission_schedule_objs = [DABSSubmissionWindowSchedule(**values) for values in month_schedule_values]
        submission_schedule_objs += [DABSSubmissionWindowSchedule(**values) for values in quarter_schedule_values]

        return submission_schedule_objs

Пример #19

0

Показать файл

Файл: fpds_nightly_loader.py Проект: abduncan/usaspending-api

    def fetch_fpds_data_generator(dap_uid_list):
        start_time = datetime.now()

        db_cursor = connections["data_broker"].cursor()

        db_query = "SELECT * FROM detached_award_procurement WHERE detached_award_procurement_id IN ({});"

        total_uid_count = len(dap_uid_list)

        for i in range(0, total_uid_count, BATCH_FETCH_SIZE):
            max_index = i + BATCH_FETCH_SIZE if i + BATCH_FETCH_SIZE < total_uid_count else total_uid_count
            fpds_ids_batch = dap_uid_list[i:max_index]

            log_msg = "[{}] Fetching {}-{} out of {} records from broker"
            logger.info(log_msg.format(datetime.now() - start_time, i, max_index, total_uid_count))

            db_cursor.execute(db_query.format(",".join(str(id) for id in fpds_ids_batch)))
            yield dictfetchall(db_cursor)  # this returns an OrderedDict

Пример #20

0

Показать файл

Файл: load_gtas.py Проект: fedspendingtransparency/usaspending-api

    def handle(self, *args, **options):
        logger.info('Creating broker cursor')
        broker_cursor = connections['data_broker'].cursor()

        logger.info('Running TOTAL_OBLIGATION_SQL')
        broker_cursor.execute(TOTAL_OBLIGATION_SQL)

        logger.info('Getting total obligation values from cursor')
        total_obligation_values = dictfetchall(broker_cursor)

        logger.info('Deleting all existing GTAS total obligation records in website')
        GTASTotalObligation.objects.all().delete()

        logger.info('Inserting GTAS total obligations records into website')
        total_obligation_objs = [GTASTotalObligation(**values) for values in total_obligation_values]
        GTASTotalObligation.objects.bulk_create(total_obligation_objs)

        logger.info('GTAS loader finished successfully!')

Пример #21

0

Показать файл

    def broker_data(self, db_cursor, table_name, options):
        """Applies user-selected filters and gets rows from appropriate broker-side table"""
        filter_sql = []
        filter_values = []
        for (column, filter) in (
                ('action_date_begin', ' AND (action_date IS NOT NULL) AND CAST(action_date AS DATE) >= %s'),
                ('action_date_end', ' AND (action_date IS NOT NULL) AND CAST(action_date AS DATE) <= %s'),
                ('cgac', ' AND awarding_agency_code = %s'), ):
            if options[column]:
                filter_sql.append(filter)
                filter_values.append(options[column])
        filter_sql = "\n".join(filter_sql)

        sql = 'SELECT * FROM {} WHERE true {}'.format(table_name, filter_sql)
        db_cursor.execute(sql, filter_values)
        results = dictfetchall(db_cursor)
        logger.info('Acquired {}, there are {} rows.'.format(table_name, len(results)))
        return results

Пример #22

0

Показать файл

Файл: fpds_nightly_loader.py Проект: fedspendingtransparency/usaspending-api

    def fetch_fpds_data_generator(dap_uid_list):
        start_time = datetime.now()

        db_cursor = connections["data_broker"].cursor()

        db_query = "SELECT * FROM detached_award_procurement WHERE detached_award_procurement_id IN ({});"

        total_uid_count = len(dap_uid_list)

        for i in range(0, total_uid_count, BATCH_FETCH_SIZE):
            max_index = i + BATCH_FETCH_SIZE if i + BATCH_FETCH_SIZE < total_uid_count else total_uid_count
            fpds_ids_batch = dap_uid_list[i:max_index]

            log_msg = "[{}] Fetching {}-{} out of {} records from broker"
            logger.info(log_msg.format(datetime.now() - start_time, i + 1, max_index, total_uid_count))

            db_cursor.execute(db_query.format(",".join(str(id) for id in fpds_ids_batch)))
            yield dictfetchall(db_cursor)  # this returns an OrderedDict

Пример #23

0

Показать файл

Файл: upsert_fabs_transactions.py Проект: fedspendingtransparency/usaspending-api

def fetch_fabs_data_generator(dap_uid_list):
    db_cursor = connections["data_broker"].cursor()
    db_query = """
        SELECT * FROM published_award_financial_assistance
        WHERE published_award_financial_assistance_id IN %s;
    """

    total_uid_count = len(dap_uid_list)

    for i in range(0, total_uid_count, BATCH_FETCH_SIZE):
        start_time = time.perf_counter()
        max_index = i + BATCH_FETCH_SIZE if i + BATCH_FETCH_SIZE < total_uid_count else total_uid_count
        fabs_ids_batch = dap_uid_list[i:max_index]

        log_msg = "Fetching {}-{} out of {} records from broker"
        logger.info(log_msg.format(i + 1, max_index, total_uid_count))

        db_cursor.execute(db_query, [tuple(fabs_ids_batch)])
        logger.info("Fetching records took {:.2f}s".format(time.perf_counter() - start_time))
        yield dictfetchall(db_cursor)

Пример #24

0

Показать файл

Файл: executive_compensation_etl.py Проект: codestaruser/usaspending-api

def load_executive_compensation(db_cursor, duns_list=None):
    """
    Loads File E from the broker. db_cursor should be the db_cursor for Broker
    """
    if duns_list is None:
        duns_list = list(
            set(LegalEntity.objects.all().exclude(
                recipient_unique_id__isnull=True).values_list(
                    "recipient_unique_id", flat=True)))

    duns_list = [str(x) for x in duns_list]

    # File E
    db_cursor.execute(FILE_E_QUERY, [tuple(duns_list)])
    e_data = dictfetchall(db_cursor)
    logger.info("Updating Executive Compensation, entries: {}".format(
        len(e_data)))

    for row in e_data:
        leo_update_dict = {
            "officer_1_name": row['high_comp_officer1_full_na'],
            "officer_1_amount": row['high_comp_officer1_amount'],
            "officer_2_name": row['high_comp_officer2_full_na'],
            "officer_2_amount": row['high_comp_officer2_amount'],
            "officer_3_name": row['high_comp_officer3_full_na'],
            "officer_3_amount": row['high_comp_officer3_amount'],
            "officer_4_name": row['high_comp_officer4_full_na'],
            "officer_4_amount": row['high_comp_officer4_amount'],
            "officer_5_name": row['high_comp_officer5_full_na'],
            "officer_5_amount": row['high_comp_officer5_amount'],
        }

        leo = LegalEntityOfficers.objects.get(
            legal_entity__recipient_unique_id=row['awardee_or_recipient_uniqu']
        )

        for attr, value in leo_update_dict.items():
            if value == "":
                value = None
            setattr(leo, attr, value)
        leo.save()

Пример #25

0

Показать файл

Файл: load_gtas.py Проект: workexpress/usaspending-api

    def handle(self, *args, **options):
        logger.info('Creating broker cursor')
        broker_cursor = connections['data_broker'].cursor()

        logger.info('Running TOTAL_OBLIGATION_SQL')
        broker_cursor.execute(TOTAL_OBLIGATION_SQL)

        logger.info('Getting total obligation values from cursor')
        total_obligation_values = dictfetchall(broker_cursor)

        logger.info(
            'Deleting all existing GTAS total obligation records in website')
        GTASTotalObligation.objects.all().delete()

        logger.info('Inserting GTAS total obligations records into website')
        total_obligation_objs = [
            GTASTotalObligation(**values) for values in total_obligation_values
        ]
        GTASTotalObligation.objects.bulk_create(total_obligation_objs)

        logger.info('GTAS loader finished successfully!')

Пример #26

0

Показать файл

Файл: upsert_fabs_transactions.py Проект: Derradjakli/usaspending-api

def fetch_fabs_data_generator(dap_uid_list):
    db_cursor = connections["data_broker"].cursor()
    db_query = """
        SELECT * FROM published_award_financial_assistance
        WHERE published_award_financial_assistance_id IN %s;
    """

    total_uid_count = len(dap_uid_list)

    for i in range(0, total_uid_count, BATCH_FETCH_SIZE):
        start_time = time.perf_counter()
        max_index = i + BATCH_FETCH_SIZE if i + BATCH_FETCH_SIZE < total_uid_count else total_uid_count
        fabs_ids_batch = dap_uid_list[i:max_index]

        log_msg = "Fetching {}-{} out of {} records from broker"
        logger.info(log_msg.format(i + 1, max_index, total_uid_count))

        db_cursor.execute(db_query, [tuple(fabs_ids_batch)])
        logger.info(
            "Fetching records took {:.2f}s".format(time.perf_counter() -
                                                   start_time))
        yield dictfetchall(db_cursor)

Пример #27

0

Показать файл

Файл: bulk_load_fabs.py Проект: fedspendingtransparency/usaspending-api

    def get_fabs_data(self, db_cursor, fiscal_year=None, to_insert=None):
        query = 'SELECT * FROM published_award_financial_assistance WHERE is_active=TRUE'
        arguments = []

        if to_insert:
            query += ' AND published_award_financial_assistance_id IN %s'
            arguments += [tuple(to_insert)]

        if fiscal_year:
            fy_begin = '10/01/' + str(fiscal_year - 1)
            fy_end = '09/30/' + str(fiscal_year)
            query += ' AND action_date::Date BETWEEN %s AND %s'
            arguments += [fy_begin, fy_end]

        query += ' ORDER BY published_award_financial_assistance_id'

        logger.info("Executing select query on Broker DB")

        db_cursor.execute(query, arguments)

        logger.info("Running dictfetchall on db_cursor")
        return dictfetchall(db_cursor)

Пример #28

0

Показать файл

    def get_fabs_data(self, db_cursor, fiscal_year=None, to_insert=None):
        query = 'SELECT * FROM published_award_financial_assistance WHERE is_active=TRUE'
        arguments = []

        if to_insert:
            query += ' AND published_award_financial_assistance_id IN %s'
            arguments += [tuple(to_insert)]

        if fiscal_year:
            fy_begin = '10/01/' + str(fiscal_year - 1)
            fy_end = '09/30/' + str(fiscal_year)
            query += ' AND action_date::Date BETWEEN %s AND %s'
            arguments += [fy_begin, fy_end]

        query += ' ORDER BY published_award_financial_assistance_id'

        logger.info("Executing select query on Broker DB")

        db_cursor.execute(query, arguments)

        logger.info("Running dictfetchall on db_cursor")
        return dictfetchall(db_cursor)

Пример #29

0

Показать файл

Файл: load_gtas.py Проект: mono2010/usaspending-api

    def handle(self, *args, **options):
        logger.info("Creating broker cursor")
        broker_cursor = connections["data_broker"].cursor()

        logger.info("Running TOTAL_OBLIGATION_SQL")
        broker_cursor.execute(self.broker_fetch_sql())

        logger.info("Getting total obligation values from cursor")
        total_obligation_values = dictfetchall(broker_cursor)

        logger.info(
            "Deleting all existing GTAS total obligation records in website")
        GTASSF133Balances.objects.all().delete()

        logger.info("Inserting GTAS total obligations records into website")
        total_obligation_objs = [
            GTASSF133Balances(**values) for values in total_obligation_values
        ]
        GTASSF133Balances.objects.bulk_create(total_obligation_objs)

        self._execute_dml_sql(self.tas_fk_sql(), "Populating TAS foreign keys")

        logger.info("GTAS loader finished successfully!")

Пример #30

0

Показать файл

Файл: load_gtas.py Проект: lenjonemcse/usaspending-api

    def process_data(self):
        broker_cursor = connections["data_broker"].cursor()

        logger.info("Extracting data from Broker")
        broker_cursor.execute(self.broker_fetch_sql)
        total_obligation_values = dictfetchall(broker_cursor)

        logger.info("Deleting all existing GTAS total obligation records in website")
        deletes = GTASSF133Balances.objects.all().delete()
        logger.info(f"Deleted {deletes[0]:,} records")

        logger.info("Transforming new GTAS records")
        total_obligation_objs = [GTASSF133Balances(**values) for values in total_obligation_values]

        logger.info("Loading new GTAS records into database")
        new_rec_count = len(GTASSF133Balances.objects.bulk_create(total_obligation_objs))
        logger.info(f"Loaded: {new_rec_count:,} records")

        load_rec = self._execute_dml_sql(self.tas_fk_sql, "Populating TAS foreign keys")
        logger.info(f"Set {load_rec:,} TAS FKs in GTAS table, {new_rec_count - load_rec:,} NULLs")
        delete_rec = self._execute_dml_sql(self.financing_account_sql, "Drop Financing Account TAS")
        logger.info(f"Deleted {delete_rec:,} records in GTAS table due to invalid TAS")
        logger.info("Committing transaction to database")

Пример #31

0

Показать файл

Файл: load_fsrs.py Проект: russellbodine/usaspending-api

    def get_award_data(db_cursor, award_type, max_id):
        """ Gets data for all new awards from broker with ID greater than the ones already stored for the given
            award type
        """
        query_columns = ['internal_id']

        # we need different columns depending on if it's a procurement or a grant
        if award_type == 'procurement':
            query_columns.extend([
                'contract_number', 'idv_reference_number',
                'contracting_office_aid', 'contract_agency_code',
                'contract_idv_agency_code'
            ])
        else:
            # TODO contracting_office_aid equivalent? Do we even need it?
            query_columns.extend(['fain'])
        query = "SELECT " + ",".join(query_columns) + " FROM fsrs_" + award_type +\
                " WHERE id > " + str(max_id) + " ORDER BY id"

        db_cursor.execute(query)

        logger.info("Running dictfetchall on db_cursor")
        return dictfetchall(db_cursor)

Пример #32

0

Показать файл

def fetch_fabs_data_generator(dap_uid_list):
    db_cursor = connection.cursor()
    db_query = """
        SELECT * FROM source_assistance_transaction
        WHERE published_award_financial_assistance_id IN %s;
    """

    total_uid_count = len(dap_uid_list)

    for i in range(0, total_uid_count, BATCH_FETCH_SIZE):
        start_time = time.perf_counter()
        max_index = i + BATCH_FETCH_SIZE if i + BATCH_FETCH_SIZE < total_uid_count else total_uid_count
        fabs_ids_batch = dap_uid_list[i:max_index]

        logger.info(
            f"Fetching {i + 1}-{max_index} out of {total_uid_count} records from source table"
        )
        db_cursor.execute(db_query, [tuple(fabs_ids_batch)])
        logger.info(
            "Fetching records took {:.2f}s".format(time.perf_counter() -
                                                   start_time))

        yield dictfetchall(db_cursor)

Пример #33

0

Показать файл

Файл: subaward_etl.py Проект: codestaruser/usaspending-api

def load_subawards(submission_attributes, db_cursor):
    """
    Loads File F from the broker. db_cursor should be the db_cursor for Broker
    """
    # A list of award id's to update the subaward accounts and totals on
    award_ids_to_update = set()

    # Get a list of PIIDs from this submission
    awards_for_sub = Award.objects.filter(transaction__submission=submission_attributes).distinct()
    piids = list(awards_for_sub.values_list("piid", flat=True))
    fains = list(awards_for_sub.values_list("fain", flat=True))

    # This allows us to handle an empty list in the SQL without changing the query
    piids.append(None)
    fains.append(None)

    # D1 File F
    db_cursor.execute(D1_FILE_F_QUERY, [submission_attributes.broker_submission_id, tuple(piids)])
    d1_f_data = dictfetchall(db_cursor)
    logger.info("Creating D1 F File Entries (Subcontracts): {}".format(len(d1_f_data)))
    d1_create_count = 0
    d1_update_count = 0
    d1_empty_count = 0

    for row in d1_f_data:
        if row['subcontract_num'] is None:
            if row['id'] is not None and row['subcontract_amount'] is not None:
                logger.warn("Subcontract of broker id {} has amount, but no number".format(row["id"]))
                logger.warn("Failing row: {}".format(row))
            else:
                d1_empty_count += 1
            continue

        # Get the agency
        agency = get_valid_awarding_agency(row)

        if not agency:
            logger.warn("Subaward number {} cannot find matching agency with toptier code {} and subtier code {}".format(row['subcontract_num'], row['awarding_agency_code'], row['awarding_sub_tier_agency_c']))
            continue

        # Find the award to attach this sub-contract to
        # We perform this lookup by finding the Award containing a transaction with
        # a matching parent award id, piid, and submission attributes
        award = Award.objects.filter(awarding_agency=agency,
                                     transaction__submission=submission_attributes,
                                     transaction__contract_data__piid=row['piid'],
                                     transaction__contract_data__isnull=False,
                                     transaction__contract_data__parent_award_id=row['parent_award_id']).distinct().order_by("-date_signed").first()

        # We don't have a matching award for this subcontract, log a warning and continue to the next row
        if not award:
            logger.warn("Subcontract number {} cannot find matching award with piid {}, parent_award_id {}; skipping...".format(row['subcontract_num'], row['piid'], row['parent_award_id']))
            continue

        award_ids_to_update.add(award.id)

        # Find the recipient by looking up by duns
        recipient, created = LegalEntity.get_or_create_by_duns(duns=row['duns'])

        if created:
            recipient.parent_recipient_unique_id = row['parent_duns']
            recipient.recipient_name = row['company_name']
            recipient.location = get_or_create_location(row, location_d1_recipient_mapper)
            recipient.save()

        # Get or create POP
        place_of_performance = get_or_create_location(row, pop_mapper)

        d1_f_dict = {
            'award': award,
            'recipient': recipient,
            'submission': submission_attributes,
            'data_source': "DBR",
            'cfda': None,
            'awarding_agency': award.awarding_agency,
            'funding_agency': award.funding_agency,
            'place_of_performance': place_of_performance,
            'subaward_number': row['subcontract_num'],
            'amount': row['subcontract_amount'],
            'description': row['overall_description'],
            'recovery_model_question1': row['recovery_model_q1'],
            'recovery_model_question2': row['recovery_model_q2'],
            'action_date': row['subcontract_date'],
            'award_report_fy_month': row['report_period_mon'],
            'award_report_fy_year': row['report_period_year'],
            'naics': row['naics'],
            'naics_description': row['naics_description'],
        }

        # Create the subaward
        subaward, created = Subaward.objects.update_or_create(subaward_number=row['subcontract_num'],
                                                              award=award,
                                                              defaults=d1_f_dict)
        if created:
            d1_create_count += 1
        else:
            d1_update_count += 1

    # D2 File F
    db_cursor.execute(D2_FILE_F_QUERY, [submission_attributes.broker_submission_id, tuple(fains)])
    d2_f_data = dictfetchall(db_cursor)
    logger.info("Creating D2 F File Entries (Subawards): {}".format(len(d2_f_data)))
    d2_create_count = 0
    d2_update_count = 0
    d2_empty_count = 0

    for row in d2_f_data:
        if row['subaward_num'] is None:
            if row['id'] is not None and row['subaward_amount'] is not None:
                logger.warn("Subcontract of broker id {} has amount, but no number".format(row["id"]))
                logger.warn("Failing row: {}".format(row))
            else:
                d2_empty_count += 1
            continue

        agency = get_valid_awarding_agency(row)

        if not agency:
            logger.warn("Subaward number {} cannot find matching agency with toptier code {} and subtier code {}".format(row['subaward_num'], row['awarding_agency_code'], row['awarding_sub_tier_agency_c']))
            continue

        # Find the award to attach this sub-award to
        # We perform this lookup by finding the Award containing a transaction with
        # a matching fain and submission. If this fails, try submission and uri
        if row['fain'] and len(row['fain']) > 0:
            award = Award.objects.filter(awarding_agency=agency,
                                         transaction__submission=submission_attributes,
                                         transaction__assistance_data__isnull=False,
                                         transaction__assistance_data__fain=row['fain']).distinct().order_by("-date_signed").first()

        # Couldn't find a match on FAIN, try URI if it exists
        if not award and row['uri'] and len(row['uri']) > 0:
            award = Award.objects.filter(awarding_agency=agency,
                                         transaction__submission=submission_attributes,
                                         transaction__assistance_data__isnull=False,
                                         transaction__assistance_data__uri=row['uri']).distinct().first()

        # We don't have a matching award for this subcontract, log a warning and continue to the next row
        if not award:
            logger.warn("Subaward number {} cannot find matching award with fain {}, uri {}; skipping...".format(row['subaward_num'], row['fain'], row['uri']))
            continue

        award_ids_to_update.add(award.id)

        # Find the recipient by looking up by duns
        recipient, created = LegalEntity.get_or_create_by_duns(duns=row['duns'])

        if created:
            recipient_name = row['awardee_name']
            if recipient_name is None:
                recipient_name = row['awardee_or_recipient_legal']
            if recipient_name is None:
                recipient_name = ""

            recipient.recipient_name = recipient_name
            recipient.parent_recipient_unique_id = row['parent_duns']
            recipient.location = get_or_create_location(row, location_d2_recipient_mapper)
            recipient.save()

        # Get or create POP
        place_of_performance = get_or_create_location(row, pop_mapper)

        # Get CFDA Program
        cfda = Cfda.objects.filter(program_number=row['cfda_number']).first()

        d2_f_dict = {
            'award': award,
            'recipient': recipient,
            'submission': submission_attributes,
            'data_source': "DBR",
            'cfda': cfda,
            'awarding_agency': award.awarding_agency,
            'funding_agency': award.funding_agency,
            'place_of_performance': place_of_performance,
            'subaward_number': row['subaward_num'],
            'amount': row['subaward_amount'],
            'description': row['project_description'],
            'recovery_model_question1': row['compensation_q1'],
            'recovery_model_question2': row['compensation_q2'],
            'action_date': row['subaward_date'],
            'award_report_fy_month': row['report_period_mon'],
            'award_report_fy_year': row['report_period_year'],
            'naics': None,
            'naics_description': None,
        }

        # Create the subaward
        subaward, created = Subaward.objects.update_or_create(subaward_number=row['subaward_num'],
                                                              award=award,
                                                              defaults=d2_f_dict)
        if created:
            d2_create_count += 1
        else:
            d2_update_count += 1

    # Update Award objects with subaward aggregates
    update_award_subawards(tuple(award_ids_to_update))

    logger.info(
        """Submission {}
           Subcontracts created: {}
           Subcontracts updated: {}
           Empty subcontract rows: {}
           Subawards created: {}
           Subawards updated: {}
           Empty subaward rows: {}""".format(submission_attributes.broker_submission_id,
                                             d1_create_count,
                                             d1_update_count,
                                             d1_empty_count,
                                             d2_create_count,
                                             d2_update_count,
                                             d2_empty_count))

Пример #34

0

Показать файл

def get_file_b(submission_attributes, db_cursor):
    """
    Get broker File B data for a specific submission.
    This function was added as a workaround for the fact that a few agencies (two, as of April, 2017: DOI and ACHP)
    submit multiple File B records for the same object class. These "dupes", come in as the same 4 digit object
    class code but with one of the direct reimbursable flags set to NULL.

    From our perspective, this is a duplicate, because we get our D/R info from the 1st digit of the object class when
    it's four digits.

    Thus, this function examines the File B data for a given submission. If it has the issue of "duplicate" object
    classes, it will squash the offending records together so that all financial totals are reporting as a single object
    class/program activity/TAS record as expected.

    If the broker validations change to prohibit this pattern in the data, this intervening function will no longer be
    necessary, we can go back to selecting * from the broker's File B data.

    Args:
        submission_attributes: submission object currently being loaded
        db_cursor: db connection info
    """
    submission_id = submission_attributes.broker_submission_id

    # does this file B have the dupe object class edge case?
    check_dupe_oc = ("SELECT count(*) "
                     "FROM certified_object_class_program_activity "
                     "WHERE submission_id = %s "
                     "AND length(object_class) = 4 "
                     "GROUP BY tas_id, program_activity_code, object_class "
                     "HAVING COUNT(*) > 1")
    db_cursor.execute(check_dupe_oc, [submission_id])
    dupe_oc_count = len(dictfetchall(db_cursor))

    if dupe_oc_count == 0:
        # there are no object class duplicates, so proceed as usual
        db_cursor.execute(
            "SELECT * FROM certified_object_class_program_activity WHERE submission_id = %s",
            [submission_id])
    else:
        # file b contains at least one case of duplicate 4 digit object classes for the same program activity/tas,
        # so combine the records in question
        combine_dupe_oc = (
            "SELECT  "
            "submission_id, "
            "job_id, "
            "agency_identifier, "
            "allocation_transfer_agency, "
            "availability_type_code, "
            "beginning_period_of_availa, "
            "ending_period_of_availabil, "
            "main_account_code, "
            "RIGHT(object_class, 3) AS object_class, "
            "CASE WHEN length(object_class) = 4 AND LEFT(object_class, 1) = '1' THEN 'D' "
            "WHEN length(object_class) = 4 AND LEFT(object_class, 1) = '2' THEN 'R' "
            "ELSE by_direct_reimbursable_fun END AS by_direct_reimbursable_fun, "
            "tas, "
            "tas_id, "
            "program_activity_code, "
            "program_activity_name, "
            "sub_account_code, "
            "SUM(deobligations_recov_by_pro_cpe) AS deobligations_recov_by_pro_cpe, "
            "SUM(gross_outlay_amount_by_pro_cpe) AS gross_outlay_amount_by_pro_cpe, "
            "SUM(gross_outlay_amount_by_pro_fyb) AS gross_outlay_amount_by_pro_fyb, "
            "SUM(gross_outlays_delivered_or_cpe) AS gross_outlays_delivered_or_cpe, "
            "SUM(gross_outlays_delivered_or_fyb) AS gross_outlays_delivered_or_fyb, "
            "SUM(gross_outlays_undelivered_cpe) AS gross_outlays_undelivered_cpe, "
            "SUM(gross_outlays_undelivered_fyb) AS gross_outlays_undelivered_fyb, "
            "SUM(obligations_delivered_orde_cpe) AS obligations_delivered_orde_cpe, "
            "SUM(obligations_delivered_orde_fyb) AS obligations_delivered_orde_fyb, "
            "SUM(obligations_incurred_by_pr_cpe) AS obligations_incurred_by_pr_cpe, "
            "SUM(obligations_undelivered_or_cpe) AS obligations_undelivered_or_cpe, "
            "SUM(obligations_undelivered_or_fyb) AS obligations_undelivered_or_fyb, "
            "SUM(ussgl480100_undelivered_or_cpe) AS ussgl480100_undelivered_or_cpe, "
            "SUM(ussgl480100_undelivered_or_fyb) AS ussgl480100_undelivered_or_fyb, "
            "SUM(ussgl480200_undelivered_or_cpe) AS ussgl480200_undelivered_or_cpe, "
            "SUM(ussgl480200_undelivered_or_fyb) AS ussgl480200_undelivered_or_fyb, "
            "SUM(ussgl483100_undelivered_or_cpe) AS ussgl483100_undelivered_or_cpe, "
            "SUM(ussgl483200_undelivered_or_cpe) AS ussgl483200_undelivered_or_cpe, "
            "SUM(ussgl487100_downward_adjus_cpe) AS ussgl487100_downward_adjus_cpe, "
            "SUM(ussgl487200_downward_adjus_cpe) AS ussgl487200_downward_adjus_cpe, "
            "SUM(ussgl488100_upward_adjustm_cpe) AS ussgl488100_upward_adjustm_cpe, "
            "SUM(ussgl488200_upward_adjustm_cpe) AS ussgl488200_upward_adjustm_cpe, "
            "SUM(ussgl490100_delivered_orde_cpe) AS ussgl490100_delivered_orde_cpe, "
            "SUM(ussgl490100_delivered_orde_fyb) AS ussgl490100_delivered_orde_fyb, "
            "SUM(ussgl490200_delivered_orde_cpe) AS ussgl490200_delivered_orde_cpe, "
            "SUM(ussgl490800_authority_outl_cpe) AS ussgl490800_authority_outl_cpe, "
            "SUM(ussgl490800_authority_outl_fyb) AS ussgl490800_authority_outl_fyb, "
            "SUM(ussgl493100_delivered_orde_cpe) AS ussgl493100_delivered_orde_cpe, "
            "SUM(ussgl497100_downward_adjus_cpe) AS ussgl497100_downward_adjus_cpe, "
            "SUM(ussgl497200_downward_adjus_cpe) AS ussgl497200_downward_adjus_cpe, "
            "SUM(ussgl498100_upward_adjustm_cpe) AS ussgl498100_upward_adjustm_cpe, "
            "SUM(ussgl498200_upward_adjustm_cpe) AS ussgl498200_upward_adjustm_cpe "
            "FROM certified_object_class_program_activity "
            "WHERE submission_id = %s "
            "GROUP BY  "
            "submission_id, "
            "job_id, "
            "agency_identifier, "
            "allocation_transfer_agency, "
            "availability_type_code, "
            "beginning_period_of_availa, "
            "ending_period_of_availabil, "
            "main_account_code, "
            "RIGHT(object_class, 3), "
            "CASE WHEN length(object_class) = 4 AND LEFT(object_class, 1) = '1' THEN 'D' "
            "WHEN length(object_class) = 4 AND LEFT(object_class, 1) = '2' THEN 'R' "
            "ELSE by_direct_reimbursable_fun END, "
            "program_activity_code, "
            "program_activity_name, "
            "sub_account_code, "
            "tas, "
            "tas_id")
        logger.info(
            "Found {} duplicated File B 4 digit object codes in submission {}. "
            "Aggregating financial values.".format(dupe_oc_count,
                                                   submission_id))
        # we have at least one instance of duplicated 4 digit object classes so aggregate the financial values together
        db_cursor.execute(combine_dupe_oc, [submission_id])

    data = dictfetchall(db_cursor)
    return data

Пример #35

0

Показать файл

Файл: load_submission.py Проект: ece-jacob-scott/usaspending-api

    def handle_loading(self, db_cursor, *args, **options):
        def signal_handler(signal, frame):
            transaction.set_rollback(True)
            raise Exception('Received interrupt signal. Aborting...')

        signal.signal(signal.SIGINT, signal_handler)
        signal.signal(signal.SIGTERM, signal_handler)

        submission_id = options['submission_id'][0]

        logger.info(
            'Getting submission {} from broker...'.format(submission_id))
        db_cursor.execute('SELECT * FROM submission WHERE submission_id = %s',
                          [submission_id])

        submission_data = dictfetchall(db_cursor)
        logger.info(
            'Finished getting submission {} from broker'.format(submission_id))

        if len(submission_data) == 0:
            raise CommandError('Could not find submission with id ' +
                               str(submission_id))
        elif len(submission_data) > 1:
            raise CommandError('Found multiple submissions with id ' +
                               str(submission_id))

        submission_data = submission_data[0].copy()
        broker_submission_id = submission_data['submission_id']
        del submission_data[
            'submission_id']  # We use broker_submission_id, submission_id is our own PK
        submission_attributes = get_submission_attributes(
            broker_submission_id, submission_data)

        logger.info('Getting File A data')
        db_cursor.execute(
            'SELECT * FROM certified_appropriation WHERE submission_id = %s',
            [submission_id])
        appropriation_data = dictfetchall(db_cursor)
        logger.info('Acquired File A (appropriation) data for ' +
                    str(submission_id) + ', there are ' +
                    str(len(appropriation_data)) + ' rows.')
        logger.info('Loading File A data')
        start_time = datetime.now()
        load_file_a(submission_attributes, appropriation_data, db_cursor)
        logger.info(
            'Finished loading File A data, took {}'.format(datetime.now() -
                                                           start_time))

        logger.info('Getting File B data')
        prg_act_obj_cls_data = get_file_b(submission_attributes, db_cursor)
        logger.info(
            'Acquired File B (program activity object class) data for ' +
            str(submission_id) + ', there are ' +
            str(len(prg_act_obj_cls_data)) + ' rows.')
        logger.info('Loading File B data')
        start_time = datetime.now()
        load_file_b(submission_attributes, prg_act_obj_cls_data, db_cursor)
        logger.info(
            'Finished loading File B data, took {}'.format(datetime.now() -
                                                           start_time))

        logger.info('Getting File C data')
        # we dont have sub-tier agency info, so we'll do our best
        # to match them to the more specific award records
        award_financial_query = 'SELECT * FROM certified_award_financial WHERE submission_id = {0}'.\
            format(submission_id)
        if isinstance(db_cursor, PhonyCursor):  # spoofed data for test
            award_financial_frame = pd.DataFrame(
                db_cursor.db_responses[award_financial_query])
        else:  # real data
            award_financial_frame = pd.read_sql(award_financial_query,
                                                connections['data_broker'])
        logger.info(
            'Acquired File C (award financial) data for {}, there are {} rows.'
            .format(submission_id, award_financial_frame.shape[0]))
        logger.info('Loading File C data')
        start_time = datetime.now()
        awards_touched = load_file_c(submission_attributes, db_cursor,
                                     award_financial_frame)
        logger.info(
            'Finished loading File C data, took {}'.format(datetime.now() -
                                                           start_time))

        if not options['nosubawards']:
            try:
                start_time = datetime.now()
                logger.info('Loading subaward data...')
                load_subawards(submission_attributes, awards_touched,
                               db_cursor)
                logger.info('Finshed loading subaward data, took {}'.format(
                    datetime.now() - start_time))
            except Exception:
                logger.warning("Error loading subawards for this submission")
        else:
            logger.info('Skipping subawards due to flags...')

        # Once all the files have been processed, run any global cleanup/post-load tasks.
        # Cleanup not specific to this submission is run in the `.handle` method
        logger.info('Successfully loaded broker submission {}.'.format(
            options['submission_id'][0]))

Пример #36

0

Показать файл

Файл: update_transactions.py Проект: fedspendingtransparency/usaspending-api

    def update_transaction_assistance(db_cursor, fiscal_year=None, page=1, limit=500000):

        # logger.info("Getting IDs for what's currently in the DB...")
        # current_ids = TransactionFABS.objects
        #
        # if fiscal_year:
        #     current_ids = current_ids.filter(action_date__fy=fiscal_year)
        #
        # current_ids = current_ids.values_list('published_award_financial_assistance_id', flat=True)

        query = "SELECT * FROM published_award_financial_assistance"
        arguments = []

        fy_begin = '10/01/' + str(fiscal_year - 1)
        fy_end = '09/30/' + str(fiscal_year)

        if fiscal_year:
            if arguments:
                query += " AND"
            else:
                query += " WHERE"
            query += ' action_date::Date BETWEEN %s AND %s'
            arguments += [fy_begin]
            arguments += [fy_end]
        query += ' ORDER BY published_award_financial_assistance_id LIMIT %s OFFSET %s'
        arguments += [limit, (page-1)*limit]

        logger.info("Executing query on Broker DB => " + query % (arguments[0], arguments[1],
                                                                  arguments[2], arguments[3]))

        db_cursor.execute(query, arguments)

        logger.info("Running dictfetchall on db_cursor")
        award_financial_assistance_data = dictfetchall(db_cursor)

        legal_entity_location_field_map = {
            "address_line1": "legal_entity_address_line1",
            "address_line2": "legal_entity_address_line2",
            "address_line3": "legal_entity_address_line3",
            "city_name": "legal_entity_city_name",
            "congressional_code": "legal_entity_congressional",
            "county_code": "legal_entity_county_code",
            "county_name": "legal_entity_county_name",
            "foreign_city_name": "legal_entity_foreign_city",
            "foreign_postal_code": "legal_entity_foreign_posta",
            "foreign_province": "legal_entity_foreign_provi",
            "state_code": "legal_entity_state_code",
            "state_name": "legal_entity_state_name",
            "zip5": "legal_entity_zip5",
            "zip_last4": "legal_entity_zip_last4",
            "location_country_code": "legal_entity_country_code"
        }

        place_of_performance_field_map = {
            "city_name": "place_of_performance_city",
            "performance_code": "place_of_performance_code",
            "congressional_code": "place_of_performance_congr",
            "county_name": "place_of_perform_county_na",
            "foreign_location_description": "place_of_performance_forei",
            "state_name": "place_of_perform_state_nam",
            "zip4": "place_of_performance_zip4a",
            "location_country_code": "place_of_perform_country_c"

        }

        fad_field_map = {
            "type": "assistance_type",
            "description": "award_description",
        }

        logger.info("Getting total rows")
        # rows_loaded = len(current_ids)
        total_rows = len(award_financial_assistance_data)  # - rows_loaded

        logger.info("Processing " + str(total_rows) + " rows of assistance data")

        # skip_count = 0


# ROW ITERATION STARTS HERE

        lel_bulk = []
        pop_bulk = []
        legal_entity_bulk = []
        award_bulk = []

        transaction_assistance_bulk = []
        transaction_normalized_bulk = []

        logger.info('Getting legal entity location objects for {} rows...'.format(len(award_financial_assistance_data)))
        for index, row in enumerate(award_financial_assistance_data, 1):

            # Recipient flag is true for LeL
            legal_entity_location = get_or_create_location(
                legal_entity_location_field_map, row, {"recipient_flag": True}, save=False
            )

            lel_bulk.append(legal_entity_location)

        logger.info('Bulk creating {} legal entity location rows...'.format(len(lel_bulk)))
        try:
            Location.objects.bulk_create(lel_bulk)
        except IntegrityError:
            logger.info('!!! DUPLICATES FOUND. Continuing... ')

        logger.info('Getting place of performance objects for {} rows...'.format(len(award_financial_assistance_data)))
        for index, row in enumerate(award_financial_assistance_data, 1):

            # Place of Performance flag is true for PoP
            pop_location = get_or_create_location(
                place_of_performance_field_map, row, {"place_of_performance_flag": True}, save=False
            )

            pop_bulk.append(pop_location)

        logger.info('Bulk creating {} place of performance rows...'.format(len(pop_bulk)))
        try:
            Location.objects.bulk_create(pop_bulk)
        except IntegrityError:
            logger.info('!!! DUPLICATES FOUND. Continuing... ')

        logger.info('Getting legal entity objects for {} rows...'.format(len(award_financial_assistance_data)))
        for index, row in enumerate(award_financial_assistance_data, 1):

            recipient_name = row.get('awardee_or_recipient_legal', '')

            legal_entity = LegalEntity.objects.filter(recipient_unique_id=row['awardee_or_recipient_uniqu'],
                                                      recipient_name=recipient_name).first()

            if legal_entity is None:
                legal_entity = LegalEntity(recipient_unique_id=row['awardee_or_recipient_uniqu'],
                                           recipient_name=recipient_name)
                legal_entity_value_map = {
                    "location": lel_bulk[index - 1],
                }
                legal_entity = load_data_into_model(legal_entity, row, value_map=legal_entity_value_map, save=False)

            legal_entity_bulk.append(legal_entity)

        logger.info('Bulk creating {} legal entity rows...'.format(len(legal_entity_bulk)))
        try:
            LegalEntity.objects.bulk_create(legal_entity_bulk)
        except IntegrityError:
            logger.info('!!! DUPLICATES FOUND. Continuing... ')

        awarding_agency_list = []
        funding_agency_list = []

        logger.info('Getting award objects for {} rows...'.format(len(award_financial_assistance_data)))
        for index, row in enumerate(award_financial_assistance_data, 1):
            # If awarding toptier agency code (aka CGAC) is not supplied on the D2 record,
            # use the sub tier code to look it up. This code assumes that all incoming
            # records will supply an awarding subtier agency code
            if row['awarding_agency_code'] is None or len(row['awarding_agency_code'].strip()) < 1:
                awarding_subtier_agency_id = subtier_agency_map[row["awarding_sub_tier_agency_c"]]
                awarding_toptier_agency_id = subtier_to_agency_map[awarding_subtier_agency_id]['toptier_agency_id']
                awarding_cgac_code = toptier_agency_map[awarding_toptier_agency_id]
                row['awarding_agency_code'] = awarding_cgac_code

            # If funding toptier agency code (aka CGAC) is empty, try using the sub
            # tier funding code to look it up. Unlike the awarding agency, we can't
            # assume that the funding agency subtier code will always be present.
            if row['funding_agency_code'] is None or len(row['funding_agency_code'].strip()) < 1:
                funding_subtier_agency_id = subtier_agency_map.get(row["funding_sub_tier_agency_co"])
                if funding_subtier_agency_id is not None:
                    funding_toptier_agency_id = \
                        subtier_to_agency_map[funding_subtier_agency_id]['toptier_agency_id']
                    funding_cgac_code = toptier_agency_map[funding_toptier_agency_id]
                else:
                    funding_cgac_code = None
                row['funding_agency_code'] = funding_cgac_code

            # Find the award that this award transaction belongs to. If it doesn't exist, create it.
            awarding_agency = Agency.get_by_toptier_subtier(
                row['awarding_agency_code'],
                row["awarding_sub_tier_agency_c"]
            )
            funding_agency = Agency.get_by_toptier_subtier(
                row['funding_agency_code'],
                row["funding_sub_tier_agency_co"]
            )

            awarding_agency_list.append(awarding_agency)
            funding_agency_list.append(funding_agency)

            # award.save() is called in Award.get_or_create_summary_award by default
            created, award = Award.get_or_create_summary_award(
                awarding_agency=awarding_agency,
                fain=row.get('fain'),
                uri=row.get('uri'),
                save=False
            )

            award_bulk.append(award)
            award_update_id_list.append(award.id)

        logger.info('Bulk creating {} award rows...'.format(len(award_bulk)))
        try:
            Award.objects.bulk_create(award_bulk)
        except IntegrityError:
            logger.info('!!! DUPLICATES FOUND. Continuing... ')

        logger.info('Getting transaction_normalized for {} rows...'.format(len(award_financial_assistance_data)))
        for index, row in enumerate(award_financial_assistance_data, 1):

            parent_txn_value_map = {
                "award": award_bulk[index - 1],
                "awarding_agency": awarding_agency_list[index - 1],
                "funding_agency": funding_agency_list[index - 1],
                "recipient": legal_entity_bulk[index - 1],
                "place_of_performance": pop_bulk[index - 1],
                "period_of_performance_start_date": format_date(row['period_of_performance_star']),
                "period_of_performance_current_end_date": format_date(row['period_of_performance_curr']),
                "action_date": format_date(row['action_date']),
            }

            transaction_dict = load_data_into_model(
                TransactionNormalized(),  # thrown away
                row,
                field_map=fad_field_map,
                value_map=parent_txn_value_map,
                as_dict=True)

            transaction_normalized = TransactionNormalized.get_or_create_transaction(**transaction_dict)
            transaction_normalized.fiscal_year = fy(transaction_normalized.action_date)
            transaction_normalized_bulk.append(transaction_normalized)

        logger.info('Bulk creating {} TransactionNormalized rows...'.format(len(transaction_normalized_bulk)))
        try:
            TransactionNormalized.objects.bulk_create(transaction_normalized_bulk)
        except IntegrityError:
            logger.info('Tried and failed to insert duplicate transaction_normalized row. Continuing... ')

        for index, row in enumerate(award_financial_assistance_data, 1):
            financial_assistance_data = load_data_into_model(
                TransactionFABS(),  # thrown away
                row,
                as_dict=True)

            transaction_assistance = TransactionFABS(transaction=transaction_normalized_bulk[index - 1],
                                                     **financial_assistance_data)
            transaction_assistance_bulk.append(transaction_assistance)

        logger.info('Bulk creating TransactionFABS rows...')
        try:
            TransactionFABS.objects.bulk_create(transaction_assistance_bulk)
        except IntegrityError:
            logger.info('!!! DUPLICATES FOUND. Continuing... ')

Пример #37

0

Показать файл

def get_file_b(submission_attributes, db_cursor):
    """
    Get broker File B data for a specific submission.
    This function was added as a workaround for the fact that a few agencies (two, as of April, 2017: DOI and ACHP)
    submit multiple File B records for the same object class. These "dupes", come in as the same 4 digit object
    class code but with one of the direct reimbursable flags set to NULL.

    From our perspective, this is a duplicate, because we get our D/R info from the 1st digit of the object class when
    it's four digits.

    Thus, this function examines the File B data for a given submission. If it has the issue of "duplicate" object
    classes, it will squash the offending records together so that all financial totals are reporting as a single object
    class/program activity/TAS record as expected.

    If the broker validations change to prohibit this pattern in the data, this intervening function will no longer be
    necessary, we can go back to selecting * from the broker's File B data.

    Args:
        submission_attributes: submission object currently being loaded
        db_cursor: db connection info
    """
    submission_id = submission_attributes.submission_id

    # does this file B have the dupe object class edge case?
    check_dupe_oc = f"""
        select      count(*)
        from        certified_object_class_program_activity
        where       submission_id = %s and length(object_class) = 4
        group by    tas_id, program_activity_code, object_class, disaster_emergency_fund_code
        having      count(*) > 1
    """
    db_cursor.execute(check_dupe_oc, [submission_id])
    dupe_oc_count = len(dictfetchall(db_cursor))

    if dupe_oc_count == 0:
        # there are no object class duplicates, so proceed as usual
        db_cursor.execute(
            "select * from certified_object_class_program_activity where submission_id = %s",
            [submission_id])
    else:
        # file b contains at least one case of duplicate 4 digit object classes for the same program activity/tas,
        # so combine the records in question
        combine_dupe_oc = f"""
            select
                submission_id,
                job_id,
                agency_identifier,
                allocation_transfer_agency,
                availability_type_code,
                beginning_period_of_availa,
                ending_period_of_availabil,
                main_account_code,
                right(object_class, 3) as object_class,
                case
                    when length(object_class) = 4 and left(object_class, 1) = '1' then 'D'
                    when length(object_class) = 4 and left(object_class, 1) = '2' then 'R'
                    else by_direct_reimbursable_fun
                end as by_direct_reimbursable_fun,
                tas,
                tas_id,
                program_activity_code,
                program_activity_name,
                sub_account_code,
                sum(deobligations_recov_by_pro_cpe) as deobligations_recov_by_pro_cpe,
                sum(gross_outlay_amount_by_pro_cpe) as gross_outlay_amount_by_pro_cpe,
                sum(gross_outlay_amount_by_pro_fyb) as gross_outlay_amount_by_pro_fyb,
                sum(gross_outlays_delivered_or_cpe) as gross_outlays_delivered_or_cpe,
                sum(gross_outlays_delivered_or_fyb) as gross_outlays_delivered_or_fyb,
                sum(gross_outlays_undelivered_cpe) as gross_outlays_undelivered_cpe,
                sum(gross_outlays_undelivered_fyb) as gross_outlays_undelivered_fyb,
                sum(obligations_delivered_orde_cpe) as obligations_delivered_orde_cpe,
                sum(obligations_delivered_orde_fyb) as obligations_delivered_orde_fyb,
                sum(obligations_incurred_by_pr_cpe) as obligations_incurred_by_pr_cpe,
                sum(obligations_undelivered_or_cpe) as obligations_undelivered_or_cpe,
                sum(obligations_undelivered_or_fyb) as obligations_undelivered_or_fyb,
                sum(ussgl480100_undelivered_or_cpe) as ussgl480100_undelivered_or_cpe,
                sum(ussgl480100_undelivered_or_fyb) as ussgl480100_undelivered_or_fyb,
                sum(ussgl480200_undelivered_or_cpe) as ussgl480200_undelivered_or_cpe,
                sum(ussgl480200_undelivered_or_fyb) as ussgl480200_undelivered_or_fyb,
                sum(ussgl483100_undelivered_or_cpe) as ussgl483100_undelivered_or_cpe,
                sum(ussgl483200_undelivered_or_cpe) as ussgl483200_undelivered_or_cpe,
                sum(ussgl487100_downward_adjus_cpe) as ussgl487100_downward_adjus_cpe,
                sum(ussgl487200_downward_adjus_cpe) as ussgl487200_downward_adjus_cpe,
                sum(ussgl488100_upward_adjustm_cpe) as ussgl488100_upward_adjustm_cpe,
                sum(ussgl488200_upward_adjustm_cpe) as ussgl488200_upward_adjustm_cpe,
                sum(ussgl490100_delivered_orde_cpe) as ussgl490100_delivered_orde_cpe,
                sum(ussgl490100_delivered_orde_fyb) as ussgl490100_delivered_orde_fyb,
                sum(ussgl490200_delivered_orde_cpe) as ussgl490200_delivered_orde_cpe,
                sum(ussgl490800_authority_outl_cpe) as ussgl490800_authority_outl_cpe,
                sum(ussgl490800_authority_outl_fyb) as ussgl490800_authority_outl_fyb,
                sum(ussgl493100_delivered_orde_cpe) as ussgl493100_delivered_orde_cpe,
                sum(ussgl497100_downward_adjus_cpe) as ussgl497100_downward_adjus_cpe,
                sum(ussgl497200_downward_adjus_cpe) as ussgl497200_downward_adjus_cpe,
                sum(ussgl498100_upward_adjustm_cpe) as ussgl498100_upward_adjustm_cpe,
                sum(ussgl498200_upward_adjustm_cpe) as ussgl498200_upward_adjustm_cpe,
                disaster_emergency_fund_code
            from
                certified_object_class_program_activity
            where
                submission_id = %s
            group by
                submission_id,
                job_id,
                agency_identifier,
                allocation_transfer_agency,
                availability_type_code,
                beginning_period_of_availa,
                ending_period_of_availabil,
                main_account_code,
                right(object_class, 3),
                case
                    when length(object_class) = 4 and left(object_class, 1) = '1' then 'D'
                    when length(object_class) = 4 and left(object_class, 1) = '2' then 'R'
                    else by_direct_reimbursable_fun
                end,
                program_activity_code,
                program_activity_name,
                sub_account_code,
                tas,
                tas_id,
                disaster_emergency_fund_code
        """
        logger.info(
            f"Found {dupe_oc_count:,} duplicated File B 4 digit object codes in submission {submission_id}. "
            f"Aggregating financial values.")
        # we have at least one instance of duplicated 4 digit object classes so aggregate the financial values together
        db_cursor.execute(combine_dupe_oc, [submission_id])

    data = dictfetchall(db_cursor)
    return data

Пример #38

0

Показать файл

Файл: update_locations.py Проект: fedspendingtransparency/usaspending-api

    def update_location_transaction_contract(db_cursor, fiscal_year=None, page=1, limit=500000, save=True):

        list_of_columns = (', '.join(['piid', 'award_modification_amendme', 'legal_entity_country_code',
                                      'place_of_perform_country_c', 'legal_entity_state_code',
                                      'place_of_performance_state']))

        query = "SELECT {} FROM detached_award_procurement".format(list_of_columns)
        arguments = []

        fy_begin = '10/01/' + str(fiscal_year - 1)
        fy_end = '09/30/' + str(fiscal_year)

        if fiscal_year:
            if arguments:
                query += " AND"
            else:
                query += " WHERE"
            query += ' action_date::Date BETWEEN %s AND %s'
            arguments += [fy_begin]
            arguments += [fy_end]
        query += ' ORDER BY detached_award_procurement_id LIMIT %s OFFSET %s'
        arguments += [limit, (page - 1) * limit]

        logger.info("Executing query on Broker DB => " + query % (arguments[0], arguments[1],
                                                                  arguments[2], arguments[3]))

        db_cursor.execute(query, arguments)

        logger.info("Running dictfetchall on db_cursor")
        procurement_data = dictfetchall(db_cursor)

        logger.info("Getting total rows")
        total_rows = len(procurement_data)  # - rows_loaded

        logger.info("Processing " + str(total_rows) + " rows of procurement data")

        start_time = datetime.now()
        for index, row in enumerate(procurement_data, 1):
            with db_transaction.atomic():

                if not (index % 100):
                    logger.info('D1 File Fix: Fixing row {} of {} ({})'.format(str(index),
                                                                               str(total_rows),
                                                                               datetime.now() - start_time))

                transaction = TransactionNormalized.objects. \
                    filter(award__piid=row['piid'], modification_number=row['award_modification_amendme']).first()
                if not transaction:
                    logger.info('Couldn\'t find transaction with piid ({}) and modification_number({}). Skipping.'.
                                format(row['piid'], row['award_modification_amendme']))
                    continue

                if transaction.recipient and transaction.recipient.location:
                    lel = transaction.recipient.location
                    location_country_code = row['legal_entity_country_code']
                    state_code = row['legal_entity_state_code']
                    lel = update_country_code("d1", lel, location_country_code, state_code)
                    lel.save()

                if transaction.place_of_performance:
                    pop = transaction.place_of_performance
                    location_country_code = row['place_of_perform_country_c']
                    state_code = row['place_of_performance_state']
                    pop = update_country_code("d1", pop, location_country_code, state_code)
                    pop.save()

Пример #39

0

Показать файл

Файл: update_locations.py Проект: fedspendingtransparency/usaspending-api

    def update_location_transaction_assistance(db_cursor, fiscal_year=2017, page=1, limit=500000, save=True):

        list_of_columns = (', '.join(['fain', 'uri', 'award_modification_amendme', 'legal_entity_country_code',
                                      'place_of_perform_country_c', 'place_of_performance_code',
                                      'legal_entity_state_code', 'legal_entity_state_name',
                                      'place_of_perform_state_nam']))

        # get the transaction values we need
        # TODO: Modify cutoff date to match nightly loads
        query = "SELECT {} FROM published_award_financial_assistance WHERE is_active=TRUE " \
                "AND updated_at < '09/20/2017'".format(list_of_columns)
        arguments = []

        fy_begin = '10/01/' + str(fiscal_year - 1)
        fy_end = '09/30/' + str(fiscal_year)

        if fiscal_year:
            query += " AND"
            query += ' action_date::Date BETWEEN %s AND %s'
            arguments += [fy_begin]
            arguments += [fy_end]
        query += ' ORDER BY published_award_financial_assistance_id LIMIT %s OFFSET %s'
        arguments += [limit, (page - 1) * limit]

        logger.info("Executing query on Broker DB => " + query % (arguments[0], arguments[1],
                                                                  arguments[2], arguments[3]))

        db_cursor.execute(query, arguments)

        logger.info("Running dictfetchall on db_cursor")
        award_financial_assistance_data = dictfetchall(db_cursor)

        logger.info("Getting total rows")
        total_rows = len(award_financial_assistance_data)  # - rows_loaded

        logger.info("Processing " + str(total_rows) + " rows of location data")

        start_time = datetime.now()

        trans_queryset = TransactionNormalized.objects.prefetch_related('award',
                                                                        'recipient__location')

        for index, row in enumerate(award_financial_assistance_data, 1):
            if not (index % 100):
                logger.info('Location Fix: Fixing row {} of {} ({})'.format(str(index),
                                                                            str(total_rows),
                                                                            datetime.now() - start_time))
            # Could also use contract_data__fain
            transaction = trans_queryset.filter(award__fain=row['fain'],
                                                award__uri=row['uri'],
                                                modification_number=row['award_modification_amendme']).first()
            if not transaction:
                logger.info('Couldn\'t find transaction with fain ({}), uri({}), and modification_number({}). '
                            'Skipping.'.format(row['fain'], row['uri'], row['award_modification_amendme']))
                continue

            if transaction.recipient and transaction.recipient.location:
                lel = transaction.recipient.location
                location_country_code = row['legal_entity_country_code']
                state_code = row['legal_entity_state_code']
                state_name = row['legal_entity_state_name']
                lel = update_country_code("d2", lel, location_country_code, state_code, state_name)
                lel.save()

            if transaction.place_of_performance:
                pop = transaction.place_of_performance
                location_country_code = row['place_of_perform_country_c']
                place_of_perform_code = row['place_of_performance_code']
                state_name = row['place_of_perform_state_nam']
                pop = update_country_code("d2", pop, location_country_code, state_code, state_name,
                                          place_of_performance_code=place_of_perform_code)
                pop.save()

Пример #40

0

Показать файл

Файл: load_submission.py Проект: fedspendingtransparency/usaspending-api

    def handle_loading(self, db_cursor, *args, **options):

        def signal_handler(signal, frame):
            transaction.set_rollback(True)
            raise Exception('Received interrupt signal. Aborting...')

        signal.signal(signal.SIGINT, signal_handler)
        signal.signal(signal.SIGTERM, signal_handler)

        submission_id = options['submission_id'][0]

        logger.info('Getting submission {} from broker...'.format(submission_id))
        db_cursor.execute('SELECT * FROM submission WHERE submission_id = %s', [submission_id])

        submission_data = dictfetchall(db_cursor)
        logger.info('Finished getting submission {} from broker'.format(submission_id))

        if len(submission_data) == 0:
            raise CommandError('Could not find submission with id ' + str(submission_id))
        elif len(submission_data) > 1:
            raise CommandError('Found multiple submissions with id ' + str(submission_id))

        submission_data = submission_data[0].copy()
        broker_submission_id = submission_data['submission_id']
        del submission_data['submission_id']  # We use broker_submission_id, submission_id is our own PK
        submission_attributes = get_submission_attributes(broker_submission_id, submission_data)

        logger.info('Getting File A data')
        db_cursor.execute('SELECT * FROM certified_appropriation WHERE submission_id = %s', [submission_id])
        appropriation_data = dictfetchall(db_cursor)
        logger.info('Acquired File A (appropriation) data for ' + str(submission_id) + ', there are ' + str(
            len(appropriation_data)) + ' rows.')
        logger.info('Loading File A data')
        start_time = datetime.now()
        load_file_a(submission_attributes, appropriation_data, db_cursor)
        logger.info('Finished loading File A data, took {}'.format(datetime.now() - start_time))

        logger.info('Getting File B data')
        prg_act_obj_cls_data = get_file_b(submission_attributes, db_cursor)
        logger.info(
            'Acquired File B (program activity object class) data for ' + str(submission_id) + ', there are ' + str(
                len(prg_act_obj_cls_data)) + ' rows.')
        logger.info('Loading File B data')
        start_time = datetime.now()
        load_file_b(submission_attributes, prg_act_obj_cls_data, db_cursor)
        logger.info('Finished loading File B data, took {}'.format(datetime.now() - start_time))

        logger.info('Getting File C data')
        # we dont have sub-tier agency info, so we'll do our best
        # to match them to the more specific award records
        award_financial_query = 'SELECT * FROM certified_award_financial WHERE submission_id = {0}'.\
            format(submission_id)
        if isinstance(db_cursor, PhonyCursor):  # spoofed data for test
            award_financial_frame = pd.DataFrame(db_cursor.db_responses[award_financial_query])
        else:  # real data
            award_financial_frame = pd.read_sql(award_financial_query,
                                                connections['data_broker'])
        logger.info('Acquired File C (award financial) data for {}, there are {} rows.'
                    .format(submission_id, award_financial_frame.shape[0]))
        logger.info('Loading File C data')
        start_time = datetime.now()
        awards_touched = load_file_c(submission_attributes, db_cursor, award_financial_frame)
        logger.info('Finished loading File C data, took {}'.format(datetime.now() - start_time))

        if not options['nosubawards']:
            try:
                start_time = datetime.now()
                logger.info('Loading subaward data...')
                load_subawards(submission_attributes, awards_touched, db_cursor)
                logger.info('Finshed loading subaward data, took {}'.format(datetime.now() - start_time))
            except Exception:
                logger.warning("Error loading subawards for this submission")
        else:
            logger.info('Skipping subawards due to flags...')

        # Once all the files have been processed, run any global cleanup/post-load tasks.
        # Cleanup not specific to this submission is run in the `.handle` method
        logger.info('Successfully loaded broker submission {}.'.format(options['submission_id'][0]))

Пример #41

0

Показать файл

    def handle_loading(self, db_cursor, *args, **options):
        def signal_handler(signal, frame):
            transaction.set_rollback(True)
            raise Exception("Received interrupt signal. Aborting...")

        signal.signal(signal.SIGINT, signal_handler)
        signal.signal(signal.SIGTERM, signal_handler)

        submission_id = options["submission_id"][0]

        logger.info(
            "Getting submission {} from broker...".format(submission_id))
        db_cursor.execute("SELECT * FROM submission WHERE submission_id = %s",
                          [submission_id])

        submission_data = dictfetchall(db_cursor)
        logger.info(
            "Finished getting submission {} from broker".format(submission_id))

        if len(submission_data) == 0:
            raise CommandError("Could not find submission with id " +
                               str(submission_id))
        elif len(submission_data) > 1:
            raise CommandError("Found multiple submissions with id " +
                               str(submission_id))

        submission_data = submission_data[0].copy()
        broker_submission_id = submission_data["submission_id"]
        del submission_data[
            "submission_id"]  # We use broker_submission_id, submission_id is our own PK
        submission_attributes = get_submission_attributes(
            broker_submission_id, submission_data)

        logger.info("Getting File A data")
        db_cursor.execute(
            "SELECT * FROM certified_appropriation WHERE submission_id = %s",
            [submission_id])
        appropriation_data = dictfetchall(db_cursor)
        logger.info("Acquired File A (appropriation) data for " +
                    str(submission_id) + ", there are " +
                    str(len(appropriation_data)) + " rows.")
        logger.info("Loading File A data")
        start_time = datetime.now()
        load_file_a(submission_attributes, appropriation_data, db_cursor)
        logger.info(
            "Finished loading File A data, took {}".format(datetime.now() -
                                                           start_time))

        logger.info("Getting File B data")
        prg_act_obj_cls_data = get_file_b(submission_attributes, db_cursor)
        logger.info(
            "Acquired File B (program activity object class) data for " +
            str(submission_id) + ", there are " +
            str(len(prg_act_obj_cls_data)) + " rows.")
        logger.info("Loading File B data")
        start_time = datetime.now()
        load_file_b(submission_attributes, prg_act_obj_cls_data, db_cursor)
        logger.info(
            "Finished loading File B data, took {}".format(datetime.now() -
                                                           start_time))

        logger.info("Getting File C data")
        # we dont have sub-tier agency info, so we'll do our best
        # to match them to the more specific award records
        award_financial_query = (
            "SELECT * FROM certified_award_financial"
            f" WHERE submission_id = {submission_id}"
            " AND transaction_obligated_amou IS NOT NULL AND transaction_obligated_amou != 0"
        )

        award_financial_frame = pd.read_sql(award_financial_query,
                                            connections["data_broker"])
        logger.info(
            "Acquired File C (award financial) data for {}, there are {} rows."
            .format(submission_id, award_financial_frame.shape[0]))
        logger.info("Loading File C data")
        start_time = datetime.now()
        load_file_c(submission_attributes, db_cursor, award_financial_frame)
        logger.info(
            "Finished loading File C data, took {}".format(datetime.now() -
                                                           start_time))

        # Once all the files have been processed, run any global cleanup/post-load tasks.
        # Cleanup not specific to this submission is run in the `.handle` method
        logger.info("Successfully loaded broker submission {}.".format(
            options["submission_id"][0]))

Пример #42

0

Показать файл

Файл: load_submission.py Проект: fedspendingtransparency/usaspending-api

def get_file_b(submission_attributes, db_cursor):
    """
    Get broker File B data for a specific submission.
    This function was added as a workaround for the fact that a few agencies (two, as of April, 2017: DOI and ACHP)
    submit multiple File B records for the same object class. These "dupes", come in as the same 4 digit object
    class code but with one of the direct reimbursable flags set to NULL.

    From our perspective, this is a duplicate, because we get our D/R info from the 1st digit of the object class when
    it's four digits.

    Thus, this function examines the File B data for a given submission. If it has the issue of "duplicate" object
    classes, it will squash the offending records together so that all financial totals are reporting as a single object
    class/program activity/TAS record as expected.

    If the broker validations change to prohibit this pattern in the data, this intervening function will no longer be
    necessary, we can go back to selecting * from the broker's File B data.

    Args:
        submission_attributes: submission object currently being loaded
        db_cursor: db connection info
    """
    submission_id = submission_attributes.broker_submission_id

    # does this file B have the dupe object class edge case?
    check_dupe_oc = (
        'SELECT count(*) '
        'FROM certified_object_class_program_activity '
        'WHERE submission_id = %s '
        'AND length(object_class) = 4 '
        'GROUP BY tas_id, program_activity_code, object_class '
        'HAVING COUNT(*) > 1'
    )
    db_cursor.execute(check_dupe_oc, [submission_id])
    dupe_oc_count = len(dictfetchall(db_cursor))

    if dupe_oc_count == 0:
        # there are no object class duplicates, so proceed as usual
        db_cursor.execute('SELECT * FROM certified_object_class_program_activity WHERE submission_id = %s',
                          [submission_id])
    else:
        # file b contains at least one case of duplicate 4 digit object classes for the same program activity/tas,
        # so combine the records in question
        combine_dupe_oc = (
            'SELECT  '
            'submission_id, '
            'job_id, '
            'agency_identifier, '
            'allocation_transfer_agency, '
            'availability_type_code, '
            'beginning_period_of_availa, '
            'ending_period_of_availabil, '
            'main_account_code, '
            'RIGHT(object_class, 3) AS object_class, '
            'CASE WHEN length(object_class) = 4 AND LEFT(object_class, 1) = \'1\' THEN \'d\' '
            'WHEN length(object_class) = 4 AND LEFT(object_class, 1) = \'2\' THEN \'r\' '
            'ELSE by_direct_reimbursable_fun END AS by_direct_reimbursable_fun, '
            'tas, '
            'tas_id, '
            'program_activity_code, '
            'program_activity_name, '
            'sub_account_code, '
            'SUM(deobligations_recov_by_pro_cpe) AS deobligations_recov_by_pro_cpe, '
            'SUM(gross_outlay_amount_by_pro_cpe) AS gross_outlay_amount_by_pro_cpe, '
            'SUM(gross_outlay_amount_by_pro_fyb) AS gross_outlay_amount_by_pro_fyb, '
            'SUM(gross_outlays_delivered_or_cpe) AS gross_outlays_delivered_or_cpe, '
            'SUM(gross_outlays_delivered_or_fyb) AS gross_outlays_delivered_or_fyb, '
            'SUM(gross_outlays_undelivered_cpe) AS gross_outlays_undelivered_cpe, '
            'SUM(gross_outlays_undelivered_fyb) AS gross_outlays_undelivered_fyb, '
            'SUM(obligations_delivered_orde_cpe) AS obligations_delivered_orde_cpe, '
            'SUM(obligations_delivered_orde_fyb) AS obligations_delivered_orde_fyb, '
            'SUM(obligations_incurred_by_pr_cpe) AS obligations_incurred_by_pr_cpe, '
            'SUM(obligations_undelivered_or_cpe) AS obligations_undelivered_or_cpe, '
            'SUM(obligations_undelivered_or_fyb) AS obligations_undelivered_or_fyb, '
            'SUM(ussgl480100_undelivered_or_cpe) AS ussgl480100_undelivered_or_cpe, '
            'SUM(ussgl480100_undelivered_or_fyb) AS ussgl480100_undelivered_or_fyb, '
            'SUM(ussgl480200_undelivered_or_cpe) AS ussgl480200_undelivered_or_cpe, '
            'SUM(ussgl480200_undelivered_or_fyb) AS ussgl480200_undelivered_or_fyb, '
            'SUM(ussgl483100_undelivered_or_cpe) AS ussgl483100_undelivered_or_cpe, '
            'SUM(ussgl483200_undelivered_or_cpe) AS ussgl483200_undelivered_or_cpe, '
            'SUM(ussgl487100_downward_adjus_cpe) AS ussgl487100_downward_adjus_cpe, '
            'SUM(ussgl487200_downward_adjus_cpe) AS ussgl487200_downward_adjus_cpe, '
            'SUM(ussgl488100_upward_adjustm_cpe) AS ussgl488100_upward_adjustm_cpe, '
            'SUM(ussgl488200_upward_adjustm_cpe) AS ussgl488200_upward_adjustm_cpe, '
            'SUM(ussgl490100_delivered_orde_cpe) AS ussgl490100_delivered_orde_cpe, '
            'SUM(ussgl490100_delivered_orde_fyb) AS ussgl490100_delivered_orde_fyb, '
            'SUM(ussgl490200_delivered_orde_cpe) AS ussgl490200_delivered_orde_cpe, '
            'SUM(ussgl490800_authority_outl_cpe) AS ussgl490800_authority_outl_cpe, '
            'SUM(ussgl490800_authority_outl_fyb) AS ussgl490800_authority_outl_fyb, '
            'SUM(ussgl493100_delivered_orde_cpe) AS ussgl493100_delivered_orde_cpe, '
            'SUM(ussgl497100_downward_adjus_cpe) AS ussgl497100_downward_adjus_cpe, '
            'SUM(ussgl497200_downward_adjus_cpe) AS ussgl497200_downward_adjus_cpe, '
            'SUM(ussgl498100_upward_adjustm_cpe) AS ussgl498100_upward_adjustm_cpe, '
            'SUM(ussgl498200_upward_adjustm_cpe) AS ussgl498200_upward_adjustm_cpe '
            'FROM certified_object_class_program_activity '
            'WHERE submission_id = %s '
            'GROUP BY  '
            'submission_id, '
            'job_id, '
            'agency_identifier, '
            'allocation_transfer_agency, '
            'availability_type_code, '
            'beginning_period_of_availa, '
            'ending_period_of_availabil, '
            'main_account_code, '
            'RIGHT(object_class, 3), '
            'CASE WHEN length(object_class) = 4 AND LEFT(object_class, 1) = \'1\' THEN \'d\' '
            'WHEN length(object_class) = 4 AND LEFT(object_class, 1) = \'2\' THEN \'r\' '
            'ELSE by_direct_reimbursable_fun END, '
            'program_activity_code, '
            'program_activity_name, '
            'sub_account_code, '
            'tas, '
            'tas_id'
        )
        logger.info(
            'Found {} duplicated File B 4 digit object codes in submission {}. '
            'Aggregating financial values.'.format(dupe_oc_count, submission_id))
        # we have at least one instance of duplicated 4 digit object classes so aggregate the financial values together
        db_cursor.execute(combine_dupe_oc, [submission_id])

    data = dictfetchall(db_cursor)
    return data

Пример #43

0

Показать файл

Файл: load_fsrs.py Проект: abduncan/usaspending-api

    def gather_next_subawards(db_cursor, award_type, subaward_type, max_id,
                              offset):
        """ Get next batch of subawards of the relevant type starting at a given offset """
        query_columns = [
            'award.internal_id',
            'award.id',
            'award.report_period_mon',
            'award.report_period_year',
            'sub_award.duns AS duns',
            'sub_award.parent_duns AS parent_duns',
            'sub_award.dba_name AS dba_name',
            'sub_award.principle_place_country AS principle_place_country',
            'sub_award.principle_place_city AS principle_place_city',
            'sub_award.principle_place_zip AS principle_place_zip',
            'sub_award.principle_place_state AS principle_place_state',
            'sub_award.principle_place_state_name AS principle_place_state_name',
            'sub_award.principle_place_street AS principle_place_street',
            'sub_award.principle_place_district AS principle_place_district',
            'sub_award.top_paid_fullname_1',
            'sub_award.top_paid_amount_1',
            'sub_award.top_paid_fullname_2',
            'sub_award.top_paid_amount_2',
            'sub_award.top_paid_fullname_3',
            'sub_award.top_paid_amount_3',
            'sub_award.top_paid_fullname_4',
            'sub_award.top_paid_amount_4',
            'sub_award.top_paid_fullname_5',
            'sub_award.top_paid_amount_5',
        ]

        # We need different columns depending on if it's a procurement or a grant. Setting some columns to have labels
        # so we can easily access them without making two different dictionaries.
        if award_type == 'procurement':
            query_columns.extend([
                'award.contract_number AS piid',
                'sub_award.naics AS naics_code',
                'sub_award.subcontract_num AS subaward_num',
                'sub_award.subcontract_amount AS subaward_amount',
                'sub_award.overall_description AS description',
                'sub_award.recovery_model_q1 AS q1_flag',
                'sub_award.recovery_model_q2 AS q2_flag',
                'sub_award.subcontract_date AS action_date',
                'sub_award.company_name AS recipient_name',
                'sub_award.company_address_country AS recipient_location_country_code',
                'sub_award.company_address_city AS recipient_location_city_name',
                'sub_award.company_address_zip AS recipient_location_zip4',
                'LEFT(sub_award.company_address_zip, 5) AS recipient_location_zip5',
                'sub_award.company_address_state AS recipient_location_state_code',
                'sub_award.company_address_state_name AS recipient_location_state_name',
                'sub_award.company_address_street AS recipient_location_street_address',
                'sub_award.company_address_district AS recipient_location_congressional_code',
                'sub_award.parent_company_name AS parent_recipient_name',
                'sub_award.bus_types AS bus_types',
            ])
            _select = "SELECT {}"
            _from = "FROM fsrs_{} AS award JOIN fsrs_{} AS sub_award ON sub_award.parent_id = award.id"
            _where = "WHERE award.id > {} AND sub_award.subcontract_num IS NOT NULL"
            _other = "ORDER BY award.id, sub_award.id LIMIT {} OFFSET {}"
            query = " ".join([_select, _from, _where,
                              _other]).format(",".join(query_columns),
                                              award_type, subaward_type,
                                              str(max_id), str(QUERY_LIMIT),
                                              str(offset))
        else:  # grant
            query_columns.extend([
                'sub_award.cfda_numbers',
                'sub_award.subaward_num',
                'sub_award.subaward_amount',
                'sub_award.project_description AS description',
                'sub_award.compensation_q1 AS q1_flag',
                'sub_award.compensation_q2 AS q2_flag',
                'sub_award.subaward_date AS action_date',
                'sub_award.awardee_name AS recipient_name',
                'sub_award.awardee_address_country AS recipient_location_country_code',
                'sub_award.awardee_address_city AS recipient_location_city_name',
                'sub_award.awardee_address_zip AS recipient_location_zip4',
                'LEFT(sub_award.awardee_address_zip, 5) AS recipient_location_zip5',
                'sub_award.awardee_address_state AS recipient_location_state_code',
                'sub_award.awardee_address_state_name AS recipient_location_state_name',
                'sub_award.awardee_address_street AS recipient_location_street_address',
                'sub_award.awardee_address_district AS recipient_location_congressional_code',
                'UPPER(award.fain) AS fain',
            ])
            _select = "SELECT {}"
            _from = "FROM fsrs_{} AS award JOIN fsrs_{} AS sub_award ON sub_award.parent_id = award.id"
            _where = "WHERE award.id > {} AND sub_award.subaward_num IS NOT NULL"
            _other = "ORDER BY award.id, sub_award.id LIMIT {} OFFSET {}"
            query = " ".join([_select, _from, _where,
                              _other]).format(",".join(query_columns),
                                              award_type, subaward_type,
                                              str(max_id), str(QUERY_LIMIT),
                                              str(offset))

        db_cursor.execute(query)

        return dictfetchall(db_cursor)

Пример #44

0

Показать файл

Файл: update_transactions.py Проект: fedspendingtransparency/usaspending-api

    def update_transaction_contract(db_cursor, fiscal_year=None, page=1, limit=500000):

        # logger.info("Getting IDs for what's currently in the DB...")
        # current_ids = TransactionFPDS.objects
        #
        # if fiscal_year:
        #     current_ids = current_ids.filter(action_date__fy=fiscal_year)
        #
        # current_ids = current_ids.values_list('detached_award_procurement_id', flat=True)

        query = "SELECT * FROM detached_award_procurement"
        arguments = []

        fy_begin = '10/01/' + str(fiscal_year - 1)
        fy_end = '09/30/' + str(fiscal_year)

        if fiscal_year:
            if arguments:
                query += " AND"
            else:
                query += " WHERE"
            query += ' action_date::Date BETWEEN %s AND %s'
            arguments += [fy_begin]
            arguments += [fy_end]
        query += ' ORDER BY detached_award_procurement_id LIMIT %s OFFSET %s'
        arguments += [limit, (page-1)*limit]

        logger.info("Executing query on Broker DB => " + query % (arguments[0], arguments[1],
                                                                  arguments[2], arguments[3]))

        db_cursor.execute(query, arguments)

        logger.info("Running dictfetchall on db_cursor")
        procurement_data = dictfetchall(db_cursor)

        legal_entity_location_field_map = {
            "address_line1": "legal_entity_address_line1",
            "address_line2": "legal_entity_address_line2",
            "address_line3": "legal_entity_address_line3",
            "location_country_code": "legal_entity_country_code",
            "city_name": "legal_entity_city_name",
            "congressional_code": "legal_entity_congressional",
            "state_code": "legal_entity_state_code",
            "zip4": "legal_entity_zip4"
        }

        legal_entity_location_value_map = {
            "recipient_flag": True
        }

        place_of_performance_field_map = {
            # not sure place_of_performance_locat maps exactly to city name
            # "city_name": "place_of_performance_locat", # location id doesn't mean it's a city. Can't use this mapping
            "congressional_code": "place_of_performance_congr",
            "state_code": "place_of_performance_state",
            "zip4": "place_of_performance_zip4a",
            "location_country_code": "place_of_perform_country_c"
        }

        place_of_performance_value_map = {
            "place_of_performance_flag": True
        }

        contract_field_map = {
            "type": "contract_award_type",
            "description": "award_description"
        }

        logger.info("Getting total rows")
        # rows_loaded = len(current_ids)
        total_rows = len(procurement_data)  # - rows_loaded

        logger.info("Processing " + str(total_rows) + " rows of procurement data")

        # skip_count = 0

        start_time = datetime.now()
        for index, row in enumerate(procurement_data, 1):
            with db_transaction.atomic():
                # if TransactionFPDS.objects.values('detached_award_procurement_id').\
                #         filter(detached_award_procurement_id=str(row['detached_award_procurement_id'])).first():
                #     skip_count += 1
                #
                #     if not (skip_count % 100):
                #         logger.info('Skipped {} records so far'.format(str(skip_count)))

                if not (index % 100):
                    logger.info('D1 File Load: Loading row {} of {} ({})'.format(str(index),
                                                                                 str(total_rows),
                                                                                 datetime.now() - start_time))

                recipient_name = row['awardee_or_recipient_legal']
                if recipient_name is None:
                    recipient_name = ""

                legal_entity_location, created = get_or_create_location(
                    legal_entity_location_field_map, row, copy(legal_entity_location_value_map)
                )

                # Create the legal entity if it doesn't exist
                legal_entity, created = LegalEntity.objects.get_or_create(
                    recipient_unique_id=row['awardee_or_recipient_uniqu'],
                    recipient_name=recipient_name
                )

                if created:
                    legal_entity_value_map = {
                        "location": legal_entity_location,
                    }
                    legal_entity = load_data_into_model(legal_entity, row, value_map=legal_entity_value_map, save=True)

                # Create the place of performance location
                pop_location, created = get_or_create_location(
                    place_of_performance_field_map, row, copy(place_of_performance_value_map))

                # If awarding toptier agency code (aka CGAC) is not supplied on the D2 record,
                # use the sub tier code to look it up. This code assumes that all incoming
                # records will supply an awarding subtier agency code
                if row['awarding_agency_code'] is None or len(row['awarding_agency_code'].strip()) < 1:
                    awarding_subtier_agency_id = subtier_agency_map[row["awarding_sub_tier_agency_c"]]
                    awarding_toptier_agency_id = subtier_to_agency_map[awarding_subtier_agency_id]['toptier_agency_id']
                    awarding_cgac_code = toptier_agency_map[awarding_toptier_agency_id]
                    row['awarding_agency_code'] = awarding_cgac_code

                # If funding toptier agency code (aka CGAC) is empty, try using the sub
                # tier funding code to look it up. Unlike the awarding agency, we can't
                # assume that the funding agency subtier code will always be present.
                if row['funding_agency_code'] is None or len(row['funding_agency_code'].strip()) < 1:
                    funding_subtier_agency_id = subtier_agency_map.get(row["funding_sub_tier_agency_co"])
                    if funding_subtier_agency_id is not None:
                        funding_toptier_agency_id = \
                            subtier_to_agency_map[funding_subtier_agency_id]['toptier_agency_id']
                        funding_cgac_code = toptier_agency_map[funding_toptier_agency_id]
                    else:
                        funding_cgac_code = None
                    row['funding_agency_code'] = funding_cgac_code

                # Find the award that this award transaction belongs to. If it doesn't exist, create it.
                awarding_agency = Agency.get_by_toptier_subtier(
                    row['awarding_agency_code'],
                    row["awarding_sub_tier_agency_c"]
                )
                created, award = Award.get_or_create_summary_award(
                    awarding_agency=awarding_agency,
                    piid=row.get('piid'),
                    fain=row.get('fain'),
                    uri=row.get('uri'),
                    parent_award_piid=row.get('parent_award_id'))
                award.save()

                award_update_id_list.append(award.id)
                award_contract_update_id_list.append(award.id)

                parent_txn_value_map = {
                    "award": award,
                    "awarding_agency": awarding_agency,
                    "funding_agency": Agency.get_by_toptier_subtier(row['funding_agency_code'],
                                                                    row["funding_sub_tier_agency_co"]),
                    "recipient": legal_entity,
                    "place_of_performance": pop_location,
                    "period_of_performance_start_date": format_date(row['period_of_performance_star']),
                    "period_of_performance_current_end_date": format_date(row['period_of_performance_curr']),
                    "action_date": format_date(row['action_date']),
                }

                transaction_dict = load_data_into_model(
                    TransactionNormalized(),  # thrown away
                    row,
                    field_map=contract_field_map,
                    value_map=parent_txn_value_map,
                    as_dict=True)

                transaction = TransactionNormalized.get_or_create_transaction(**transaction_dict)
                transaction.save()

                contract_instance = load_data_into_model(
                    TransactionFPDS(),  # thrown away
                    row,
                    as_dict=True)

                transaction_contract = TransactionFPDS(transaction=transaction, **contract_instance)
                # catch exception and do nothing if we see
                # "django.db.utils.IntegrityError: duplicate key value violates unique constraint"
                try:
                    transaction_contract.save()
                except IntegrityError:
                    pass

Пример #45

0

Показать файл

    def get_fpds_data(date):
        if not hasattr(date, 'month'):
            date = datetime.strptime(date, '%Y-%m-%d').date()

        db_cursor = connections['data_broker'].cursor()

        # The ORDER BY is important here because deletions must happen in a specific order and that order is defined
        # by the Broker's PK since every modification is a new row
        db_query = 'SELECT * ' \
                   'FROM detached_award_procurement ' \
                   'WHERE updated_at >= %s'
        db_args = [date]

        db_cursor.execute(db_query, db_args)
        db_rows = dictfetchall(db_cursor)  # this returns an OrderedDict

        ids_to_delete = []

        if settings.IS_LOCAL:
            for file in os.listdir(settings.CSV_LOCAL_PATH):
                if re.search('.*_delete_records_(IDV|award).*', file) and \
                            datetime.strptime(file[:file.find('_')], '%m-%d-%Y').date() >= date:
                    with open(settings.CSV_LOCAL_PATH + file,
                              'r') as current_file:
                        # open file, split string to array, skip the header
                        reader = csv.reader(current_file.read().splitlines())
                        next(reader)
                        unique_key_list = [rows[0] for rows in reader]

                        ids_to_delete += unique_key_list
        else:
            # Connect to AWS
            aws_region = os.environ.get('AWS_REGION')
            fpds_bucket_name = os.environ.get('FPDS_BUCKET_NAME')

            if not (aws_region or fpds_bucket_name):
                raise Exception(
                    'Missing required environment variables: AWS_REGION, FPDS_BUCKET_NAME'
                )

            s3client = boto3.client('s3', region_name=aws_region)
            s3resource = boto3.resource('s3', region_name=aws_region)
            s3_bucket = s3resource.Bucket(fpds_bucket_name)

            # make an array of all the keys in the bucket
            file_list = [item.key for item in s3_bucket.objects.all()]

            # Only use files that match the date we're currently checking
            for item in file_list:
                # if the date on the file is the same day as we're checking
                if re.search('.*_delete_records_(IDV|award).*', item) and '/' not in item and \
                                datetime.strptime(item[:item.find('_')], '%m-%d-%Y').date() >= date:
                    # make the url params to pass
                    url_params = {'Bucket': fpds_bucket_name, 'Key': item}
                    # get the url for the current file
                    file_path = s3client.generate_presigned_url(
                        'get_object', Params=url_params)
                    current_file = urllib.request.urlopen(file_path)
                    reader = csv.reader(
                        current_file.read().decode("utf-8").splitlines())
                    # skip the header, the reader doesn't ignore it for some reason
                    next(reader)
                    # make an array of all the detached_award_procurement_ids
                    unique_key_list = [rows[0] for rows in reader]

                    ids_to_delete += unique_key_list

        logger.info('Number of records to insert/update: %s' %
                    str(len(db_rows)))
        logger.info('Number of records to delete: %s' %
                    str(len(ids_to_delete)))

        return db_rows, ids_to_delete

Пример #46

0

Показать файл

Файл: subaward_etl.py Проект: fedspendingtransparency/usaspending-api

def load_subawards(submission_attributes, awards_touched, db_cursor):
    """
    Loads File F from the broker. db_cursor should be the db_cursor for Broker
    """
    # A list of award id's to update the subaward accounts and totals on
    award_ids_to_update = set()

    # Get a list of PIIDs from this submission
    # TODO: URIS
    awards_touched = [Award.objects.filter(id=award_id).first() for award_id in awards_touched]
    piids = list([award.piid for award in awards_touched if award.piid])
    fains = list([award.fain for award in awards_touched if award.fain])
    uris = list([award.uri for award in awards_touched if award.uri])

    # This allows us to handle an empty list in the SQL without changing the query
    piids.append(None)
    fains.append(None)

    # D1 File F
    db_cursor.execute(D1_FILE_F_QUERY, [submission_attributes.broker_submission_id, tuple(piids)])
    d1_f_data = dictfetchall(db_cursor)
    logger.info("Creating D1 F File Entries (Subcontracts): {}".format(len(d1_f_data)))
    d1_create_count = 0
    d1_update_count = 0
    d1_empty_count = 0

    for row in d1_f_data:
        if row['subcontract_num'] is None:
            if row['id'] is not None and row['subcontract_amount'] is not None:
                logger.warn("Subcontract of broker id {} has amount, but no number".format(row["id"]))
                logger.warn("Failing row: {}".format(row))
            else:
                d1_empty_count += 1
            continue

        # Get the agency
        agency = get_valid_awarding_agency(row)

        if not agency:
            logger.warn(
                "Subaward number {} cannot find matching agency with toptier code {} and subtier code {}".format(
                    row['subcontract_num'], row['awarding_agency_code'], row['awarding_sub_tier_agency_c']))
            continue

        # Find the award to attach this sub-contract to
        # We perform this lookup by finding the Award containing a transaction with a matching parent award id, piid,
        # and submission attributes
        award = Award.objects.filter(
            awarding_agency=agency,
            latest_transaction__contract_data__piid=row['piid'],
            latest_transaction__contract_data__parent_award_id=row['parent_award_id']).distinct().order_by(
            "-date_signed").first()

        # We don't have a matching award for this subcontract, log a warning and continue to the next row
        if not award:
            logger.warn(
                "Subcontract number {} cannot find matching award with piid {}, parent_award_id {}; skipping...".format(
                    row['subcontract_num'], row['piid'], row['parent_award_id']))
            continue

        award_ids_to_update.add(award.id)

        # Get or create unique DUNS-recipient pair
        recipient, created = LegalEntity.objects.get_or_create(
            recipient_unique_id=row['duns'],
            recipient_name=row['company_name']
        )

        if created:
            recipient.parent_recipient_unique_id = row['parent_duns']
            recipient.location = get_or_create_location(row, location_d1_recipient_mapper)
            recipient.save()

        # Get or create POP
        place_of_performance = get_or_create_location(row, pop_mapper)

        d1_f_dict = {
            'award': award,
            'recipient': recipient,
            'data_source': "DBR",
            'cfda': None,
            'awarding_agency': award.awarding_agency,
            'funding_agency': award.funding_agency,
            'place_of_performance': place_of_performance,
            'subaward_number': row['subcontract_num'],
            'amount': row['subcontract_amount'],
            'description': row['overall_description'],
            'recovery_model_question1': row['recovery_model_q1'],
            'recovery_model_question2': row['recovery_model_q2'],
            'action_date': row['subcontract_date'],
            'award_report_fy_month': row['report_period_mon'],
            'award_report_fy_year': row['report_period_year']
        }

        # Create the subaward
        subaward, created = Subaward.objects.update_or_create(subaward_number=row['subcontract_num'],
                                                              award=award,
                                                              defaults=d1_f_dict)
        if created:
            d1_create_count += 1
        else:
            d1_update_count += 1

    # D2 File F
    db_cursor.execute(D2_FILE_F_QUERY, [tuple(fains), tuple(uris)])
    d2_f_data = dictfetchall(db_cursor)
    logger.info("Creating D2 F File Entries (Subawards): {}".format(len(d2_f_data)))
    d2_create_count = 0
    d2_update_count = 0
    d2_empty_count = 0

    for row in d2_f_data:
        if row['subaward_num'] is None:
            if row['id'] is not None and row['subaward_amount'] is not None:
                logger.warn("Subcontract of broker id {} has amount, but no number".format(row["id"]))
                logger.warn("Failing row: {}".format(row))
            else:
                d2_empty_count += 1
            continue

        agency = get_valid_awarding_agency(row)

        if not agency:
            logger.warn("Subaward number {} cannot find matching agency with toptier code {} and subtier "
                        "code {}".format(row['subaward_num'], row['awarding_agency_code'],
                                         row['awarding_sub_tier_agency_c']))
            continue

        # Find the award to attach this sub-award to
        # We perform this lookup by finding the Award containing a transaction with a matching fain and submission.
        # If this fails, try submission and uri
        if row['fain'] and len(row['fain']) > 0:
            award = Award.objects.filter(awarding_agency=agency,
                                         latest_transaction__assistance_data__fain=row['fain']).\
                distinct().order_by("-date_signed").first()

        # Couldn't find a match on FAIN, try URI if it exists
        if not award and row['uri'] and len(row['uri']) > 0:
            award = Award.objects.filter(awarding_agency=agency,
                                         latest_transaction__assistance_data__uri=row['uri']).distinct().first()

        # Try both
        if not award and row['fain'] and len(row['fain']) > 0 and row['uri'] and len(row['uri']) > 0:
            award = Award.objects.filter(awarding_agency=agency,
                                         latest_transaction__assistance_data__fain=row['fain'],
                                         latest_transaction__assistance_data__uri=row['uri']).\
                distinct().order_by("-date_signed").first()

        # We don't have a matching award for this subcontract, log a warning and continue to the next row
        if not award:
            logger.warn("Subaward number {} cannot find matching award with fain {}, uri {}; "
                        "skipping...".format(row['subaward_num'], row['fain'], row['uri']))
            continue

        award_ids_to_update.add(award.id)

        recipient_name = row['awardee_name']
        if recipient_name is None:
            recipient_name = row['awardee_or_recipient_legal']
        if recipient_name is None:
            recipient_name = ""

        # Get or create unique DUNS-recipient pair
        recipient, created = LegalEntity.objects.get_or_create(
            recipient_unique_id=row['duns'],
            recipient_name=recipient_name
        )

        if created:
            recipient.parent_recipient_unique_id = row['parent_duns']
            recipient.location = get_or_create_location(row, location_d2_recipient_mapper)
            recipient.save()

        # Get or create POP
        place_of_performance = get_or_create_location(row, pop_mapper)

        # Get CFDA Program
        cfda = Cfda.objects.filter(program_number=row['cfda_number']).first()

        d2_f_dict = {
            'award': award,
            'recipient': recipient,
            'data_source': "DBR",
            'cfda': cfda,
            'awarding_agency': award.awarding_agency,
            'funding_agency': award.funding_agency,
            'place_of_performance': place_of_performance,
            'subaward_number': row['subaward_num'],
            'amount': row['subaward_amount'],
            'description': row['project_description'],
            'recovery_model_question1': row['compensation_q1'],
            'recovery_model_question2': row['compensation_q2'],
            'action_date': row['subaward_date'],
            'award_report_fy_month': row['report_period_mon'],
            'award_report_fy_year': row['report_period_year']
        }

        # Create the subaward
        subaward, created = Subaward.objects.update_or_create(
            subaward_number=row['subaward_num'],
            award=award,
            defaults=d2_f_dict
        )
        if created:
            d2_create_count += 1
        else:
            d2_update_count += 1

    # Update Award objects with subaward aggregates
    update_award_subawards(tuple(award_ids_to_update))

    logger.info(
        """Submission {}
           Subcontracts created: {}
           Subcontracts updated: {}
           Empty subcontract rows: {}
           Subawards created: {}
           Subawards updated: {}
           Empty subaward rows: {}""".format(submission_attributes.broker_submission_id,
                                             d1_create_count,
                                             d1_update_count,
                                             d1_empty_count,
                                             d2_create_count,
                                             d2_update_count,
                                             d2_empty_count))

Python dictfetchall примеры использования