Пример #1
0
def test_get_by_toptier_subtier():
    """Test Agency lookup by subtier."""
    toptier = mommy.make("references.ToptierAgency",
                         toptier_code="xyz",
                         name="yo")
    subtier = mommy.make("references.SubtierAgency",
                         subtier_code="abc",
                         name="hi")

    mommy.make(
        "references.Agency",
        toptier_agency=toptier,
        subtier_agency=mommy.make("references.SubtierAgency",
                                  subtier_code="bbb"),
    )
    agency1 = mommy.make("references.Agency",
                         toptier_agency=toptier,
                         subtier_agency=subtier)

    # lookup should return agency w/ most recent updated_date that
    # matches the toptier and subtier code
    assert Agency.get_by_toptier_subtier("xyz", "abc") == agency1
    # if there's no match, we should get none
    assert Agency.get_by_toptier_subtier("nope", "nada") is None
    assert Agency.get_by_toptier_subtier("xyz", "nada") is None
    assert Agency.get_by_toptier_subtier("nope", "bbb") is None
Пример #2
0
def test_get_by_toptier_subtier():
    """Test Agency lookup by subtier."""
    toptier = mommy.make('references.ToptierAgency',
                         cgac_code='xyz',
                         name='yo')
    subtier = mommy.make('references.SubtierAgency',
                         subtier_code='abc',
                         name='hi')

    mommy.make('references.Agency',
               toptier_agency=toptier,
               subtier_agency=subtier)
    mommy.make('references.Agency',
               toptier_agency=toptier,
               subtier_agency=mommy.make('references.SubtierAgency',
                                         subtier_code='bbb'))
    agency1 = mommy.make('references.Agency',
                         toptier_agency=toptier,
                         subtier_agency=subtier)

    # lookup should return agency w/ most recent updatea_date that
    # matches the toptier and subtier code
    assert Agency.get_by_toptier_subtier('xyz', 'abc') == agency1
    # if there's no match, we should get none
    assert Agency.get_by_toptier_subtier('nope', 'nada') is None
    assert Agency.get_by_toptier_subtier('xyz', 'nada') is None
    assert Agency.get_by_toptier_subtier('nope', 'bbb') is None
def get_valid_awarding_agency(row):
    agency_subtier_code = row['awarding_sub_tier_agency_c']
    agency_toptier_code = row['awarding_agency_code']
    valid_subtier_code = (agency_subtier_code and len(agency_subtier_code) > 0)
    valid_toptier_code = (agency_toptier_code and len(agency_toptier_code) > 0)

    if not valid_toptier_code and not valid_subtier_code:
        return None

    agency = None
    # Get the awarding agency
    if valid_subtier_code and valid_toptier_code:
        agency = Agency.get_by_toptier_subtier(row['awarding_agency_code'], row['awarding_sub_tier_agency_c'])

    if not agency and valid_subtier_code:
        agency = Agency.get_by_subtier(row['awarding_sub_tier_agency_c'])

    if not agency and valid_toptier_code:
        agency = Agency.get_by_toptier(row['awarding_agency_code'])

    return agency
Пример #4
0
def get_valid_awarding_agency(row):
    agency_subtier_code = row['awarding_sub_tier_agency_c']
    agency_toptier_code = row['awarding_agency_code']
    valid_subtier_code = (agency_subtier_code and len(agency_subtier_code) > 0)
    valid_toptier_code = (agency_toptier_code and len(agency_toptier_code) > 0)

    if not valid_toptier_code and not valid_subtier_code:
        return None

    agency = None
    # Get the awarding agency
    if valid_subtier_code and valid_toptier_code:
        agency = Agency.get_by_toptier_subtier(
            row['awarding_agency_code'], row['awarding_sub_tier_agency_c'])

    if not agency and valid_subtier_code:
        agency = Agency.get_by_subtier(row['awarding_sub_tier_agency_c'])

    if not agency and valid_toptier_code:
        agency = Agency.get_by_toptier(row['awarding_agency_code'])

    return agency
Пример #5
0
def load_file_d1(submission_attributes, procurement_data, db_cursor):
    """
    Process and load file D1 broker data (contract award txns).
    """
    legal_entity_location_field_map = {
        "address_line1": "legal_entity_address_line1",
        "address_line2": "legal_entity_address_line2",
        "address_line3": "legal_entity_address_line3",
        "location_country_code": "legal_entity_country_code",
        "city_name": "legal_entity_city_name",
        "congressional_code": "legal_entity_congressional",
        "state_code": "legal_entity_state_code",
        "zip4": "legal_entity_zip4"
    }

    place_of_performance_field_map = {
        # not sure place_of_performance_locat maps exactly to city name
        "city_name": "place_of_performance_locat",
        "congressional_code": "place_of_performance_congr",
        "state_code": "place_of_performance_state",
        "zip4": "place_of_performance_zip4a",
        "location_country_code": "place_of_perform_country_c"
    }

    place_of_performance_value_map = {"place_of_performance_flag": True}

    legal_entity_location_value_map = {"recipient_flag": True}

    contract_field_map = {
        "type": "contract_award_type",
        "description": "award_description"
    }

    for row in procurement_data:
        legal_entity_location, created = get_or_create_location(
            legal_entity_location_field_map, row,
            legal_entity_location_value_map)

        # Create the legal entity if it doesn't exist
        try:
            legal_entity = LegalEntity.objects.get(
                recipient_unique_id=row['awardee_or_recipient_uniqu'])
        except ObjectDoesNotExist:
            legal_entity_value_map = {
                "location": legal_entity_location,
                "legal_entity_id": row['awardee_or_recipient_uniqu'],
            }
            legal_entity = load_data_into_model(
                LegalEntity(),
                row,
                value_map=legal_entity_value_map,
                save=True)

        # Create the place of performance location
        pop_location, created = get_or_create_location(
            place_of_performance_field_map, row,
            place_of_performance_value_map)

        # If awarding/funding toptier agency code (aka CGAC) is not supplied on the D1 record,
        # use the sub tier code to look it up
        if row['awarding_agency_code'] is None:
            row['awarding_agency_code'] = Agency.get_by_subtier(
                row["awarding_sub_tier_agency_c"]).toptier_agency.cgac_code
        if row['funding_agency_code'] is None:
            row['funding_agency_code'] = Agency.get_by_subtier(
                row["funding_sub_tier_agency_co"]).toptier_agency.cgac_code

        # Find the award that this award transaction belongs to. If it doesn't exist, create it.
        awarding_agency = Agency.get_by_toptier_subtier(
            row['awarding_agency_code'], row["awarding_sub_tier_agency_c"])
        created, award = Award.get_or_create_summary_award(
            awarding_agency=awarding_agency,
            piid=row.get('piid'),
            fain=row.get('fain'),
            uri=row.get('uri'),
            parent_award_id=row.get('parent_award_id'))
        award.save()

        AWARD_UPDATE_ID_LIST.append(award.id)
        AWARD_CONTRACT_UPDATE_ID_LIST.append(award.id)

        parent_txn_value_map = {
            "award":
            award,
            "awarding_agency":
            awarding_agency,
            "funding_agency":
            Agency.get_by_toptier_subtier(row['funding_agency_code'],
                                          row["funding_sub_tier_agency_co"]),
            "recipient":
            legal_entity,
            "place_of_performance":
            pop_location,
            'submission':
            submission_attributes,
            "period_of_performance_start_date":
            format_date(row['period_of_performance_star']),
            "period_of_performance_current_end_date":
            format_date(row['period_of_performance_curr']),
            "action_date":
            format_date(row['action_date']),
        }

        transaction_instance = load_data_into_model(
            Transaction(),
            row,
            field_map=contract_field_map,
            value_map=parent_txn_value_map,
            as_dict=True)

        transaction_instance, created = Transaction.objects.get_or_create(
            **transaction_instance)

        contract_value_map = {
            'transaction':
            transaction_instance,
            'submission':
            submission_attributes,
            'reporting_period_start':
            submission_attributes.reporting_period_start,
            'reporting_period_end':
            submission_attributes.reporting_period_end,
            "period_of_performance_potential_end_date":
            format_date(row['period_of_perf_potential_e'])
        }

        contract_instance = load_data_into_model(TransactionContract(),
                                                 row,
                                                 field_map=contract_field_map,
                                                 value_map=contract_value_map,
                                                 save=True)
    def update_transaction_assistance(db_cursor, fiscal_year=None, page=1, limit=500000):

        # logger.info("Getting IDs for what's currently in the DB...")
        # current_ids = TransactionFABS.objects
        #
        # if fiscal_year:
        #     current_ids = current_ids.filter(action_date__fy=fiscal_year)
        #
        # current_ids = current_ids.values_list('published_award_financial_assistance_id', flat=True)

        query = "SELECT * FROM published_award_financial_assistance"
        arguments = []

        fy_begin = '10/01/' + str(fiscal_year - 1)
        fy_end = '09/30/' + str(fiscal_year)

        if fiscal_year:
            if arguments:
                query += " AND"
            else:
                query += " WHERE"
            query += ' action_date::Date BETWEEN %s AND %s'
            arguments += [fy_begin]
            arguments += [fy_end]
        query += ' ORDER BY published_award_financial_assistance_id LIMIT %s OFFSET %s'
        arguments += [limit, (page-1)*limit]

        logger.info("Executing query on Broker DB => " + query % (arguments[0], arguments[1],
                                                                  arguments[2], arguments[3]))

        db_cursor.execute(query, arguments)

        logger.info("Running dictfetchall on db_cursor")
        award_financial_assistance_data = dictfetchall(db_cursor)

        legal_entity_location_field_map = {
            "address_line1": "legal_entity_address_line1",
            "address_line2": "legal_entity_address_line2",
            "address_line3": "legal_entity_address_line3",
            "city_name": "legal_entity_city_name",
            "congressional_code": "legal_entity_congressional",
            "county_code": "legal_entity_county_code",
            "county_name": "legal_entity_county_name",
            "foreign_city_name": "legal_entity_foreign_city",
            "foreign_postal_code": "legal_entity_foreign_posta",
            "foreign_province": "legal_entity_foreign_provi",
            "state_code": "legal_entity_state_code",
            "state_name": "legal_entity_state_name",
            "zip5": "legal_entity_zip5",
            "zip_last4": "legal_entity_zip_last4",
            "location_country_code": "legal_entity_country_code"
        }

        place_of_performance_field_map = {
            "city_name": "place_of_performance_city",
            "performance_code": "place_of_performance_code",
            "congressional_code": "place_of_performance_congr",
            "county_name": "place_of_perform_county_na",
            "foreign_location_description": "place_of_performance_forei",
            "state_name": "place_of_perform_state_nam",
            "zip4": "place_of_performance_zip4a",
            "location_country_code": "place_of_perform_country_c"

        }

        fad_field_map = {
            "type": "assistance_type",
            "description": "award_description",
        }

        logger.info("Getting total rows")
        # rows_loaded = len(current_ids)
        total_rows = len(award_financial_assistance_data)  # - rows_loaded

        logger.info("Processing " + str(total_rows) + " rows of assistance data")

        # skip_count = 0


# ROW ITERATION STARTS HERE

        lel_bulk = []
        pop_bulk = []
        legal_entity_bulk = []
        award_bulk = []

        transaction_assistance_bulk = []
        transaction_normalized_bulk = []

        logger.info('Getting legal entity location objects for {} rows...'.format(len(award_financial_assistance_data)))
        for index, row in enumerate(award_financial_assistance_data, 1):

            # Recipient flag is true for LeL
            legal_entity_location = get_or_create_location(
                legal_entity_location_field_map, row, {"recipient_flag": True}, save=False
            )

            lel_bulk.append(legal_entity_location)

        logger.info('Bulk creating {} legal entity location rows...'.format(len(lel_bulk)))
        try:
            Location.objects.bulk_create(lel_bulk)
        except IntegrityError:
            logger.info('!!! DUPLICATES FOUND. Continuing... ')

        logger.info('Getting place of performance objects for {} rows...'.format(len(award_financial_assistance_data)))
        for index, row in enumerate(award_financial_assistance_data, 1):

            # Place of Performance flag is true for PoP
            pop_location = get_or_create_location(
                place_of_performance_field_map, row, {"place_of_performance_flag": True}, save=False
            )

            pop_bulk.append(pop_location)

        logger.info('Bulk creating {} place of performance rows...'.format(len(pop_bulk)))
        try:
            Location.objects.bulk_create(pop_bulk)
        except IntegrityError:
            logger.info('!!! DUPLICATES FOUND. Continuing... ')

        logger.info('Getting legal entity objects for {} rows...'.format(len(award_financial_assistance_data)))
        for index, row in enumerate(award_financial_assistance_data, 1):

            recipient_name = row.get('awardee_or_recipient_legal', '')

            legal_entity = LegalEntity.objects.filter(recipient_unique_id=row['awardee_or_recipient_uniqu'],
                                                      recipient_name=recipient_name).first()

            if legal_entity is None:
                legal_entity = LegalEntity(recipient_unique_id=row['awardee_or_recipient_uniqu'],
                                           recipient_name=recipient_name)
                legal_entity_value_map = {
                    "location": lel_bulk[index - 1],
                }
                legal_entity = load_data_into_model(legal_entity, row, value_map=legal_entity_value_map, save=False)

            legal_entity_bulk.append(legal_entity)

        logger.info('Bulk creating {} legal entity rows...'.format(len(legal_entity_bulk)))
        try:
            LegalEntity.objects.bulk_create(legal_entity_bulk)
        except IntegrityError:
            logger.info('!!! DUPLICATES FOUND. Continuing... ')

        awarding_agency_list = []
        funding_agency_list = []

        logger.info('Getting award objects for {} rows...'.format(len(award_financial_assistance_data)))
        for index, row in enumerate(award_financial_assistance_data, 1):
            # If awarding toptier agency code (aka CGAC) is not supplied on the D2 record,
            # use the sub tier code to look it up. This code assumes that all incoming
            # records will supply an awarding subtier agency code
            if row['awarding_agency_code'] is None or len(row['awarding_agency_code'].strip()) < 1:
                awarding_subtier_agency_id = subtier_agency_map[row["awarding_sub_tier_agency_c"]]
                awarding_toptier_agency_id = subtier_to_agency_map[awarding_subtier_agency_id]['toptier_agency_id']
                awarding_cgac_code = toptier_agency_map[awarding_toptier_agency_id]
                row['awarding_agency_code'] = awarding_cgac_code

            # If funding toptier agency code (aka CGAC) is empty, try using the sub
            # tier funding code to look it up. Unlike the awarding agency, we can't
            # assume that the funding agency subtier code will always be present.
            if row['funding_agency_code'] is None or len(row['funding_agency_code'].strip()) < 1:
                funding_subtier_agency_id = subtier_agency_map.get(row["funding_sub_tier_agency_co"])
                if funding_subtier_agency_id is not None:
                    funding_toptier_agency_id = \
                        subtier_to_agency_map[funding_subtier_agency_id]['toptier_agency_id']
                    funding_cgac_code = toptier_agency_map[funding_toptier_agency_id]
                else:
                    funding_cgac_code = None
                row['funding_agency_code'] = funding_cgac_code

            # Find the award that this award transaction belongs to. If it doesn't exist, create it.
            awarding_agency = Agency.get_by_toptier_subtier(
                row['awarding_agency_code'],
                row["awarding_sub_tier_agency_c"]
            )
            funding_agency = Agency.get_by_toptier_subtier(
                row['funding_agency_code'],
                row["funding_sub_tier_agency_co"]
            )

            awarding_agency_list.append(awarding_agency)
            funding_agency_list.append(funding_agency)

            # award.save() is called in Award.get_or_create_summary_award by default
            created, award = Award.get_or_create_summary_award(
                awarding_agency=awarding_agency,
                fain=row.get('fain'),
                uri=row.get('uri'),
                save=False
            )

            award_bulk.append(award)
            award_update_id_list.append(award.id)

        logger.info('Bulk creating {} award rows...'.format(len(award_bulk)))
        try:
            Award.objects.bulk_create(award_bulk)
        except IntegrityError:
            logger.info('!!! DUPLICATES FOUND. Continuing... ')

        logger.info('Getting transaction_normalized for {} rows...'.format(len(award_financial_assistance_data)))
        for index, row in enumerate(award_financial_assistance_data, 1):

            parent_txn_value_map = {
                "award": award_bulk[index - 1],
                "awarding_agency": awarding_agency_list[index - 1],
                "funding_agency": funding_agency_list[index - 1],
                "recipient": legal_entity_bulk[index - 1],
                "place_of_performance": pop_bulk[index - 1],
                "period_of_performance_start_date": format_date(row['period_of_performance_star']),
                "period_of_performance_current_end_date": format_date(row['period_of_performance_curr']),
                "action_date": format_date(row['action_date']),
            }

            transaction_dict = load_data_into_model(
                TransactionNormalized(),  # thrown away
                row,
                field_map=fad_field_map,
                value_map=parent_txn_value_map,
                as_dict=True)

            transaction_normalized = TransactionNormalized.get_or_create_transaction(**transaction_dict)
            transaction_normalized.fiscal_year = fy(transaction_normalized.action_date)
            transaction_normalized_bulk.append(transaction_normalized)

        logger.info('Bulk creating {} TransactionNormalized rows...'.format(len(transaction_normalized_bulk)))
        try:
            TransactionNormalized.objects.bulk_create(transaction_normalized_bulk)
        except IntegrityError:
            logger.info('Tried and failed to insert duplicate transaction_normalized row. Continuing... ')

        for index, row in enumerate(award_financial_assistance_data, 1):
            financial_assistance_data = load_data_into_model(
                TransactionFABS(),  # thrown away
                row,
                as_dict=True)

            transaction_assistance = TransactionFABS(transaction=transaction_normalized_bulk[index - 1],
                                                     **financial_assistance_data)
            transaction_assistance_bulk.append(transaction_assistance)

        logger.info('Bulk creating TransactionFABS rows...')
        try:
            TransactionFABS.objects.bulk_create(transaction_assistance_bulk)
        except IntegrityError:
            logger.info('!!! DUPLICATES FOUND. Continuing... ')
    def update_transaction_contract(db_cursor, fiscal_year=None, page=1, limit=500000):

        # logger.info("Getting IDs for what's currently in the DB...")
        # current_ids = TransactionFPDS.objects
        #
        # if fiscal_year:
        #     current_ids = current_ids.filter(action_date__fy=fiscal_year)
        #
        # current_ids = current_ids.values_list('detached_award_procurement_id', flat=True)

        query = "SELECT * FROM detached_award_procurement"
        arguments = []

        fy_begin = '10/01/' + str(fiscal_year - 1)
        fy_end = '09/30/' + str(fiscal_year)

        if fiscal_year:
            if arguments:
                query += " AND"
            else:
                query += " WHERE"
            query += ' action_date::Date BETWEEN %s AND %s'
            arguments += [fy_begin]
            arguments += [fy_end]
        query += ' ORDER BY detached_award_procurement_id LIMIT %s OFFSET %s'
        arguments += [limit, (page-1)*limit]

        logger.info("Executing query on Broker DB => " + query % (arguments[0], arguments[1],
                                                                  arguments[2], arguments[3]))

        db_cursor.execute(query, arguments)

        logger.info("Running dictfetchall on db_cursor")
        procurement_data = dictfetchall(db_cursor)

        legal_entity_location_field_map = {
            "address_line1": "legal_entity_address_line1",
            "address_line2": "legal_entity_address_line2",
            "address_line3": "legal_entity_address_line3",
            "location_country_code": "legal_entity_country_code",
            "city_name": "legal_entity_city_name",
            "congressional_code": "legal_entity_congressional",
            "state_code": "legal_entity_state_code",
            "zip4": "legal_entity_zip4"
        }

        legal_entity_location_value_map = {
            "recipient_flag": True
        }

        place_of_performance_field_map = {
            # not sure place_of_performance_locat maps exactly to city name
            # "city_name": "place_of_performance_locat", # location id doesn't mean it's a city. Can't use this mapping
            "congressional_code": "place_of_performance_congr",
            "state_code": "place_of_performance_state",
            "zip4": "place_of_performance_zip4a",
            "location_country_code": "place_of_perform_country_c"
        }

        place_of_performance_value_map = {
            "place_of_performance_flag": True
        }

        contract_field_map = {
            "type": "contract_award_type",
            "description": "award_description"
        }

        logger.info("Getting total rows")
        # rows_loaded = len(current_ids)
        total_rows = len(procurement_data)  # - rows_loaded

        logger.info("Processing " + str(total_rows) + " rows of procurement data")

        # skip_count = 0

        start_time = datetime.now()
        for index, row in enumerate(procurement_data, 1):
            with db_transaction.atomic():
                # if TransactionFPDS.objects.values('detached_award_procurement_id').\
                #         filter(detached_award_procurement_id=str(row['detached_award_procurement_id'])).first():
                #     skip_count += 1
                #
                #     if not (skip_count % 100):
                #         logger.info('Skipped {} records so far'.format(str(skip_count)))

                if not (index % 100):
                    logger.info('D1 File Load: Loading row {} of {} ({})'.format(str(index),
                                                                                 str(total_rows),
                                                                                 datetime.now() - start_time))

                recipient_name = row['awardee_or_recipient_legal']
                if recipient_name is None:
                    recipient_name = ""

                legal_entity_location, created = get_or_create_location(
                    legal_entity_location_field_map, row, copy(legal_entity_location_value_map)
                )

                # Create the legal entity if it doesn't exist
                legal_entity, created = LegalEntity.objects.get_or_create(
                    recipient_unique_id=row['awardee_or_recipient_uniqu'],
                    recipient_name=recipient_name
                )

                if created:
                    legal_entity_value_map = {
                        "location": legal_entity_location,
                    }
                    legal_entity = load_data_into_model(legal_entity, row, value_map=legal_entity_value_map, save=True)

                # Create the place of performance location
                pop_location, created = get_or_create_location(
                    place_of_performance_field_map, row, copy(place_of_performance_value_map))

                # If awarding toptier agency code (aka CGAC) is not supplied on the D2 record,
                # use the sub tier code to look it up. This code assumes that all incoming
                # records will supply an awarding subtier agency code
                if row['awarding_agency_code'] is None or len(row['awarding_agency_code'].strip()) < 1:
                    awarding_subtier_agency_id = subtier_agency_map[row["awarding_sub_tier_agency_c"]]
                    awarding_toptier_agency_id = subtier_to_agency_map[awarding_subtier_agency_id]['toptier_agency_id']
                    awarding_cgac_code = toptier_agency_map[awarding_toptier_agency_id]
                    row['awarding_agency_code'] = awarding_cgac_code

                # If funding toptier agency code (aka CGAC) is empty, try using the sub
                # tier funding code to look it up. Unlike the awarding agency, we can't
                # assume that the funding agency subtier code will always be present.
                if row['funding_agency_code'] is None or len(row['funding_agency_code'].strip()) < 1:
                    funding_subtier_agency_id = subtier_agency_map.get(row["funding_sub_tier_agency_co"])
                    if funding_subtier_agency_id is not None:
                        funding_toptier_agency_id = \
                            subtier_to_agency_map[funding_subtier_agency_id]['toptier_agency_id']
                        funding_cgac_code = toptier_agency_map[funding_toptier_agency_id]
                    else:
                        funding_cgac_code = None
                    row['funding_agency_code'] = funding_cgac_code

                # Find the award that this award transaction belongs to. If it doesn't exist, create it.
                awarding_agency = Agency.get_by_toptier_subtier(
                    row['awarding_agency_code'],
                    row["awarding_sub_tier_agency_c"]
                )
                created, award = Award.get_or_create_summary_award(
                    awarding_agency=awarding_agency,
                    piid=row.get('piid'),
                    fain=row.get('fain'),
                    uri=row.get('uri'),
                    parent_award_piid=row.get('parent_award_id'))
                award.save()

                award_update_id_list.append(award.id)
                award_contract_update_id_list.append(award.id)

                parent_txn_value_map = {
                    "award": award,
                    "awarding_agency": awarding_agency,
                    "funding_agency": Agency.get_by_toptier_subtier(row['funding_agency_code'],
                                                                    row["funding_sub_tier_agency_co"]),
                    "recipient": legal_entity,
                    "place_of_performance": pop_location,
                    "period_of_performance_start_date": format_date(row['period_of_performance_star']),
                    "period_of_performance_current_end_date": format_date(row['period_of_performance_curr']),
                    "action_date": format_date(row['action_date']),
                }

                transaction_dict = load_data_into_model(
                    TransactionNormalized(),  # thrown away
                    row,
                    field_map=contract_field_map,
                    value_map=parent_txn_value_map,
                    as_dict=True)

                transaction = TransactionNormalized.get_or_create_transaction(**transaction_dict)
                transaction.save()

                contract_instance = load_data_into_model(
                    TransactionFPDS(),  # thrown away
                    row,
                    as_dict=True)

                transaction_contract = TransactionFPDS(transaction=transaction, **contract_instance)
                # catch exception and do nothing if we see
                # "django.db.utils.IntegrityError: duplicate key value violates unique constraint"
                try:
                    transaction_contract.save()
                except IntegrityError:
                    pass
    def update_transaction_assistance(db_cursor,
                                      fiscal_year=None,
                                      page=1,
                                      limit=500000):

        # logger.info("Getting IDs for what's currently in the DB...")
        # current_ids = TransactionFABS.objects
        #
        # if fiscal_year:
        #     current_ids = current_ids.filter(action_date__fy=fiscal_year)
        #
        # current_ids = current_ids.values_list('published_award_financial_assistance_id', flat=True)

        query = "SELECT * FROM published_award_financial_assistance"
        arguments = []

        fy_begin = '10/01/' + str(fiscal_year - 1)
        fy_end = '09/30/' + str(fiscal_year)

        if fiscal_year:
            if arguments:
                query += " AND"
            else:
                query += " WHERE"
            query += ' action_date::Date BETWEEN %s AND %s'
            arguments += [fy_begin]
            arguments += [fy_end]
        query += ' ORDER BY published_award_financial_assistance_id LIMIT %s OFFSET %s'
        arguments += [limit, (page - 1) * limit]

        logger.info("Executing query on Broker DB => " + query %
                    (arguments[0], arguments[1], arguments[2], arguments[3]))

        db_cursor.execute(query, arguments)

        logger.info("Running dictfetchall on db_cursor")
        award_financial_assistance_data = dictfetchall(db_cursor)

        legal_entity_location_field_map = {
            "address_line1": "legal_entity_address_line1",
            "address_line2": "legal_entity_address_line2",
            "address_line3": "legal_entity_address_line3",
            "city_name": "legal_entity_city_name",
            "congressional_code": "legal_entity_congressional",
            "county_code": "legal_entity_county_code",
            "county_name": "legal_entity_county_name",
            "foreign_city_name": "legal_entity_foreign_city",
            "foreign_postal_code": "legal_entity_foreign_posta",
            "foreign_province": "legal_entity_foreign_provi",
            "state_code": "legal_entity_state_code",
            "state_name": "legal_entity_state_name",
            "zip5": "legal_entity_zip5",
            "zip_last4": "legal_entity_zip_last4",
            "location_country_code": "legal_entity_country_code"
        }

        place_of_performance_field_map = {
            "city_name": "place_of_performance_city",
            "performance_code": "place_of_performance_code",
            "congressional_code": "place_of_performance_congr",
            "county_name": "place_of_perform_county_na",
            "foreign_location_description": "place_of_performance_forei",
            "state_name": "place_of_perform_state_nam",
            "zip4": "place_of_performance_zip4a",
            "location_country_code": "place_of_perform_country_c"
        }

        fad_field_map = {
            "type": "assistance_type",
            "description": "award_description",
        }

        logger.info("Getting total rows")
        # rows_loaded = len(current_ids)
        total_rows = len(award_financial_assistance_data)  # - rows_loaded

        logger.info("Processing " + str(total_rows) +
                    " rows of assistance data")

        # skip_count = 0

        # ROW ITERATION STARTS HERE

        lel_bulk = []
        pop_bulk = []
        legal_entity_bulk = []
        award_bulk = []

        transaction_assistance_bulk = []
        transaction_normalized_bulk = []

        logger.info(
            'Getting legal entity location objects for {} rows...'.format(
                len(award_financial_assistance_data)))
        for index, row in enumerate(award_financial_assistance_data, 1):

            # Recipient flag is true for LeL
            legal_entity_location = get_or_create_location(
                legal_entity_location_field_map,
                row, {"recipient_flag": True},
                save=False)

            lel_bulk.append(legal_entity_location)

        logger.info('Bulk creating {} legal entity location rows...'.format(
            len(lel_bulk)))
        try:
            Location.objects.bulk_create(lel_bulk)
        except IntegrityError:
            logger.info('!!! DUPLICATES FOUND. Continuing... ')

        logger.info(
            'Getting place of performance objects for {} rows...'.format(
                len(award_financial_assistance_data)))
        for index, row in enumerate(award_financial_assistance_data, 1):

            # Place of Performance flag is true for PoP
            pop_location = get_or_create_location(
                place_of_performance_field_map,
                row, {"place_of_performance_flag": True},
                save=False)

            pop_bulk.append(pop_location)

        logger.info('Bulk creating {} place of performance rows...'.format(
            len(pop_bulk)))
        try:
            Location.objects.bulk_create(pop_bulk)
        except IntegrityError:
            logger.info('!!! DUPLICATES FOUND. Continuing... ')

        logger.info('Getting legal entity objects for {} rows...'.format(
            len(award_financial_assistance_data)))
        for index, row in enumerate(award_financial_assistance_data, 1):

            recipient_name = row.get('awardee_or_recipient_legal', '')

            legal_entity = LegalEntity.objects.filter(
                recipient_unique_id=row['awardee_or_recipient_uniqu'],
                recipient_name=recipient_name).first()

            if legal_entity is None:
                legal_entity = LegalEntity(
                    recipient_unique_id=row['awardee_or_recipient_uniqu'],
                    recipient_name=recipient_name)
                legal_entity_value_map = {
                    "location": lel_bulk[index - 1],
                }
                legal_entity = load_data_into_model(
                    legal_entity,
                    row,
                    value_map=legal_entity_value_map,
                    save=False)

            legal_entity_bulk.append(legal_entity)

        logger.info('Bulk creating {} legal entity rows...'.format(
            len(legal_entity_bulk)))
        try:
            LegalEntity.objects.bulk_create(legal_entity_bulk)
        except IntegrityError:
            logger.info('!!! DUPLICATES FOUND. Continuing... ')

        awarding_agency_list = []
        funding_agency_list = []

        logger.info('Getting award objects for {} rows...'.format(
            len(award_financial_assistance_data)))
        for index, row in enumerate(award_financial_assistance_data, 1):
            # If awarding toptier agency code (aka CGAC) is not supplied on the D2 record,
            # use the sub tier code to look it up. This code assumes that all incoming
            # records will supply an awarding subtier agency code
            if row['awarding_agency_code'] is None or len(
                    row['awarding_agency_code'].strip()) < 1:
                awarding_subtier_agency_id = subtier_agency_map[
                    row["awarding_sub_tier_agency_c"]]
                awarding_toptier_agency_id = subtier_to_agency_map[
                    awarding_subtier_agency_id]['toptier_agency_id']
                awarding_cgac_code = toptier_agency_map[
                    awarding_toptier_agency_id]
                row['awarding_agency_code'] = awarding_cgac_code

            # If funding toptier agency code (aka CGAC) is empty, try using the sub
            # tier funding code to look it up. Unlike the awarding agency, we can't
            # assume that the funding agency subtier code will always be present.
            if row['funding_agency_code'] is None or len(
                    row['funding_agency_code'].strip()) < 1:
                funding_subtier_agency_id = subtier_agency_map.get(
                    row["funding_sub_tier_agency_co"])
                if funding_subtier_agency_id is not None:
                    funding_toptier_agency_id = \
                        subtier_to_agency_map[funding_subtier_agency_id]['toptier_agency_id']
                    funding_cgac_code = toptier_agency_map[
                        funding_toptier_agency_id]
                else:
                    funding_cgac_code = None
                row['funding_agency_code'] = funding_cgac_code

            # Find the award that this award transaction belongs to. If it doesn't exist, create it.
            awarding_agency = Agency.get_by_toptier_subtier(
                row['awarding_agency_code'], row["awarding_sub_tier_agency_c"])
            funding_agency = Agency.get_by_toptier_subtier(
                row['funding_agency_code'], row["funding_sub_tier_agency_co"])

            awarding_agency_list.append(awarding_agency)
            funding_agency_list.append(funding_agency)

            # award.save() is called in Award.get_or_create_summary_award by default
            created, award = Award.get_or_create_summary_award(
                awarding_agency=awarding_agency,
                fain=row.get('fain'),
                uri=row.get('uri'),
                save=False)

            award_bulk.append(award)
            award_update_id_list.append(award.id)

        logger.info('Bulk creating {} award rows...'.format(len(award_bulk)))
        try:
            Award.objects.bulk_create(award_bulk)
        except IntegrityError:
            logger.info('!!! DUPLICATES FOUND. Continuing... ')

        logger.info('Getting transaction_normalized for {} rows...'.format(
            len(award_financial_assistance_data)))
        for index, row in enumerate(award_financial_assistance_data, 1):

            parent_txn_value_map = {
                "award":
                award_bulk[index - 1],
                "awarding_agency":
                awarding_agency_list[index - 1],
                "funding_agency":
                funding_agency_list[index - 1],
                "recipient":
                legal_entity_bulk[index - 1],
                "place_of_performance":
                pop_bulk[index - 1],
                "period_of_performance_start_date":
                format_date(row['period_of_performance_star']),
                "period_of_performance_current_end_date":
                format_date(row['period_of_performance_curr']),
                "action_date":
                format_date(row['action_date']),
            }

            transaction_dict = load_data_into_model(
                TransactionNormalized(),  # thrown away
                row,
                field_map=fad_field_map,
                value_map=parent_txn_value_map,
                as_dict=True)

            transaction_normalized = TransactionNormalized.get_or_create_transaction(
                **transaction_dict)
            transaction_normalized.fiscal_year = fy(
                transaction_normalized.action_date)
            transaction_normalized_bulk.append(transaction_normalized)

        logger.info('Bulk creating {} TransactionNormalized rows...'.format(
            len(transaction_normalized_bulk)))
        try:
            TransactionNormalized.objects.bulk_create(
                transaction_normalized_bulk)
        except IntegrityError:
            logger.info(
                'Tried and failed to insert duplicate transaction_normalized row. Continuing... '
            )

        for index, row in enumerate(award_financial_assistance_data, 1):
            financial_assistance_data = load_data_into_model(
                TransactionFABS(),  # thrown away
                row,
                as_dict=True)

            transaction_assistance = TransactionFABS(
                transaction=transaction_normalized_bulk[index - 1],
                **financial_assistance_data)
            transaction_assistance_bulk.append(transaction_assistance)

        logger.info('Bulk creating TransactionFABS rows...')
        try:
            TransactionFABS.objects.bulk_create(transaction_assistance_bulk)
        except IntegrityError:
            logger.info('!!! DUPLICATES FOUND. Continuing... ')
    def update_transaction_contract(db_cursor,
                                    fiscal_year=None,
                                    page=1,
                                    limit=500000):

        # logger.info("Getting IDs for what's currently in the DB...")
        # current_ids = TransactionFPDS.objects
        #
        # if fiscal_year:
        #     current_ids = current_ids.filter(action_date__fy=fiscal_year)
        #
        # current_ids = current_ids.values_list('detached_award_procurement_id', flat=True)

        query = "SELECT * FROM detached_award_procurement"
        arguments = []

        fy_begin = '10/01/' + str(fiscal_year - 1)
        fy_end = '09/30/' + str(fiscal_year)

        if fiscal_year:
            if arguments:
                query += " AND"
            else:
                query += " WHERE"
            query += ' action_date::Date BETWEEN %s AND %s'
            arguments += [fy_begin]
            arguments += [fy_end]
        query += ' ORDER BY detached_award_procurement_id LIMIT %s OFFSET %s'
        arguments += [limit, (page - 1) * limit]

        logger.info("Executing query on Broker DB => " + query %
                    (arguments[0], arguments[1], arguments[2], arguments[3]))

        db_cursor.execute(query, arguments)

        logger.info("Running dictfetchall on db_cursor")
        procurement_data = dictfetchall(db_cursor)

        legal_entity_location_field_map = {
            "address_line1": "legal_entity_address_line1",
            "address_line2": "legal_entity_address_line2",
            "address_line3": "legal_entity_address_line3",
            "location_country_code": "legal_entity_country_code",
            "city_name": "legal_entity_city_name",
            "congressional_code": "legal_entity_congressional",
            "state_code": "legal_entity_state_code",
            "zip4": "legal_entity_zip4"
        }

        legal_entity_location_value_map = {"recipient_flag": True}

        place_of_performance_field_map = {
            # not sure place_of_performance_locat maps exactly to city name
            # "city_name": "place_of_performance_locat", # location id doesn't mean it's a city. Can't use this mapping
            "congressional_code": "place_of_performance_congr",
            "state_code": "place_of_performance_state",
            "zip4": "place_of_performance_zip4a",
            "location_country_code": "place_of_perform_country_c"
        }

        place_of_performance_value_map = {"place_of_performance_flag": True}

        contract_field_map = {
            "type": "contract_award_type",
            "description": "award_description"
        }

        logger.info("Getting total rows")
        # rows_loaded = len(current_ids)
        total_rows = len(procurement_data)  # - rows_loaded

        logger.info("Processing " + str(total_rows) +
                    " rows of procurement data")

        # skip_count = 0

        start_time = datetime.now()
        for index, row in enumerate(procurement_data, 1):
            with db_transaction.atomic():
                # if TransactionFPDS.objects.values('detached_award_procurement_id').\
                #         filter(detached_award_procurement_id=str(row['detached_award_procurement_id'])).first():
                #     skip_count += 1
                #
                #     if not (skip_count % 100):
                #         logger.info('Skipped {} records so far'.format(str(skip_count)))

                if not (index % 100):
                    logger.info(
                        'D1 File Load: Loading row {} of {} ({})'.format(
                            str(index), str(total_rows),
                            datetime.now() - start_time))

                recipient_name = row['awardee_or_recipient_legal']
                if recipient_name is None:
                    recipient_name = ""

                legal_entity_location, created = get_or_create_location(
                    legal_entity_location_field_map, row,
                    copy(legal_entity_location_value_map))

                # Create the legal entity if it doesn't exist
                legal_entity, created = LegalEntity.objects.get_or_create(
                    recipient_unique_id=row['awardee_or_recipient_uniqu'],
                    recipient_name=recipient_name)

                if created:
                    legal_entity_value_map = {
                        "location": legal_entity_location,
                    }
                    legal_entity = load_data_into_model(
                        legal_entity,
                        row,
                        value_map=legal_entity_value_map,
                        save=True)

                # Create the place of performance location
                pop_location, created = get_or_create_location(
                    place_of_performance_field_map, row,
                    copy(place_of_performance_value_map))

                # If awarding toptier agency code (aka CGAC) is not supplied on the D2 record,
                # use the sub tier code to look it up. This code assumes that all incoming
                # records will supply an awarding subtier agency code
                if row['awarding_agency_code'] is None or len(
                        row['awarding_agency_code'].strip()) < 1:
                    awarding_subtier_agency_id = subtier_agency_map[
                        row["awarding_sub_tier_agency_c"]]
                    awarding_toptier_agency_id = subtier_to_agency_map[
                        awarding_subtier_agency_id]['toptier_agency_id']
                    awarding_cgac_code = toptier_agency_map[
                        awarding_toptier_agency_id]
                    row['awarding_agency_code'] = awarding_cgac_code

                # If funding toptier agency code (aka CGAC) is empty, try using the sub
                # tier funding code to look it up. Unlike the awarding agency, we can't
                # assume that the funding agency subtier code will always be present.
                if row['funding_agency_code'] is None or len(
                        row['funding_agency_code'].strip()) < 1:
                    funding_subtier_agency_id = subtier_agency_map.get(
                        row["funding_sub_tier_agency_co"])
                    if funding_subtier_agency_id is not None:
                        funding_toptier_agency_id = \
                            subtier_to_agency_map[funding_subtier_agency_id]['toptier_agency_id']
                        funding_cgac_code = toptier_agency_map[
                            funding_toptier_agency_id]
                    else:
                        funding_cgac_code = None
                    row['funding_agency_code'] = funding_cgac_code

                # Find the award that this award transaction belongs to. If it doesn't exist, create it.
                awarding_agency = Agency.get_by_toptier_subtier(
                    row['awarding_agency_code'],
                    row["awarding_sub_tier_agency_c"])
                created, award = Award.get_or_create_summary_award(
                    awarding_agency=awarding_agency,
                    piid=row.get('piid'),
                    fain=row.get('fain'),
                    uri=row.get('uri'),
                    parent_award_piid=row.get('parent_award_id'))
                award.save()

                award_update_id_list.append(award.id)
                award_contract_update_id_list.append(award.id)

                parent_txn_value_map = {
                    "award":
                    award,
                    "awarding_agency":
                    awarding_agency,
                    "funding_agency":
                    Agency.get_by_toptier_subtier(
                        row['funding_agency_code'],
                        row["funding_sub_tier_agency_co"]),
                    "recipient":
                    legal_entity,
                    "place_of_performance":
                    pop_location,
                    "period_of_performance_start_date":
                    format_date(row['period_of_performance_star']),
                    "period_of_performance_current_end_date":
                    format_date(row['period_of_performance_curr']),
                    "action_date":
                    format_date(row['action_date']),
                }

                transaction_dict = load_data_into_model(
                    TransactionNormalized(),  # thrown away
                    row,
                    field_map=contract_field_map,
                    value_map=parent_txn_value_map,
                    as_dict=True)

                transaction = TransactionNormalized.get_or_create_transaction(
                    **transaction_dict)
                transaction.save()

                contract_instance = load_data_into_model(
                    TransactionFPDS(),  # thrown away
                    row,
                    as_dict=True)

                transaction_contract = TransactionFPDS(transaction=transaction,
                                                       **contract_instance)
                # catch exception and do nothing if we see
                # "django.db.utils.IntegrityError: duplicate key value violates unique constraint"
                try:
                    transaction_contract.save()
                except IntegrityError:
                    pass
def load_file_d2(
        submission_attributes, award_financial_assistance_data, db_cursor, quick, row_preprocessor=no_preprocessing
):
    """
    Process and load file D2 broker data (financial assistance award txns).
    """

    d_start_time = time.time()

    if quick:
        setup_broker_fdw()

        parameters = {'broker_submission_id': submission_attributes.broker_submission_id}
        run_sql_file('usaspending_api/etl/management/load_file_d2.sql', parameters)
        logger.info('\n\n\n\nFile D2 time elapsed: {}'.format(time.time() - d_start_time))
        return

    legal_entity_location_field_map = {
        "address_line1": "legal_entity_address_line1",
        "address_line2": "legal_entity_address_line2",
        "address_line3": "legal_entity_address_line3",
        "city_code": "legal_entity_city_code",
        "city_name": "legal_entity_city_name",
        "congressional_code": "legal_entity_congressional",
        "county_code": "legal_entity_county_code",
        "county_name": "legal_entity_county_name",
        "foreign_city_name": "legal_entity_foreign_city",
        "foreign_postal_code": "legal_entity_foreign_posta",
        "foreign_province": "legal_entity_foreign_provi",
        "state_code": "legal_entity_state_code",
        "state_name": "legal_entity_state_name",
        "zip5": "legal_entity_zip5",
        "zip_last4": "legal_entity_zip_last4",
        "location_country_code": "legal_entity_country_code"
    }

    place_of_performance_field_map = {
        "city_name": "place_of_performance_city",
        "performance_code": "place_of_performance_code",
        "congressional_code": "place_of_performance_congr",
        "county_name": "place_of_perform_county_na",
        "foreign_location_description": "place_of_performance_forei",
        "state_name": "place_of_perform_state_nam",
        "zip4": "place_of_performance_zip4a",
        "location_country_code": "place_of_perform_country_c"

    }

    legal_entity_location_value_map = {
        "recipient_flag": True
    }

    place_of_performance_value_map = {
        "place_of_performance_flag": True
    }

    fad_field_map = {
        "type": "assistance_type",
        "description": "award_description",
    }

    total_rows = len(award_financial_assistance_data)

    start_time = datetime.now()
    for index, row in enumerate(award_financial_assistance_data, 1):
        if not (index % 100):
            logger.info('D2 File Load: Loading row {} of {} ({})'.format(str(index),
                                                                         str(total_rows),
                                                                         datetime.now() - start_time))

        row = row_preprocessor(row)

        legal_entity_location, created = get_or_create_location(
            legal_entity_location_field_map, row, legal_entity_location_value_map
        )

        recipient_name = row['awardee_or_recipient_legal']
        if recipient_name is None:
            recipient_name = ""

        # Create the legal entity if it doesn't exist
        legal_entity, created = LegalEntity.objects.get_or_create(
            recipient_unique_id=row['awardee_or_recipient_uniqu'],
            recipient_name=recipient_name
        )

        if created:
            legal_entity_value_map = {
                "location": legal_entity_location,
            }
            legal_entity = load_data_into_model(legal_entity, row, value_map=legal_entity_value_map, save=True)

        # Create the place of performance location
        pop_location, created = get_or_create_location(
            place_of_performance_field_map, row, place_of_performance_value_map
        )

        # If awarding toptier agency code (aka CGAC) is not supplied on the D2 record,
        # use the sub tier code to look it up. This code assumes that all incoming
        # records will supply an awarding subtier agency code
        if row['awarding_agency_code'] is None or len(row['awarding_agency_code'].strip()) < 1:
            row['awarding_agency_code'] = Agency.get_by_subtier(
                row["awarding_sub_tier_agency_c"]).toptier_agency.cgac_code
        # If funding toptier agency code (aka CGAC) is empty, try using the sub
        # tier funding code to look it up. Unlike the awarding agency, we can't
        # assume that the funding agency subtier code will always be present.
        if row['funding_agency_code'] is None or len(row['funding_agency_code'].strip()) < 1:
            funding_agency = Agency.get_by_subtier(row["funding_sub_tier_agency_co"])
            row['funding_agency_code'] = (
                funding_agency.toptier_agency.cgac_code if funding_agency is not None
                else None)

        # Find the award that this award transaction belongs to. If it doesn't exist, create it.
        awarding_agency = Agency.get_by_toptier_subtier(
            row['awarding_agency_code'],
            row["awarding_sub_tier_agency_c"]
        )
        created, award = Award.get_or_create_summary_award(
            awarding_agency=awarding_agency,
            piid=row.get('piid'),
            fain=row.get('fain'),
            uri=row.get('uri'))
        award.save()

        award_update_id_list.append(award.id)

        parent_txn_value_map = {
            "award": award,
            "awarding_agency": awarding_agency,
            "funding_agency": Agency.get_by_toptier_subtier(row['funding_agency_code'],
                                                            row["funding_sub_tier_agency_co"]),
            "recipient": legal_entity,
            "place_of_performance": pop_location,
            'submission': submission_attributes,
            "period_of_performance_start_date": format_date(row['period_of_performance_star']),
            "period_of_performance_current_end_date": format_date(row['period_of_performance_curr']),
            "action_date": format_date(row['action_date']),
        }

        transaction_dict = load_data_into_model(
            TransactionNormalized(),  # thrown away
            row,
            field_map=fad_field_map,
            value_map=parent_txn_value_map,
            as_dict=True)

        transaction = TransactionNormalized.get_or_create_transaction(**transaction_dict)
        transaction.save()

        fad_value_map = {
            "submission": submission_attributes,
            "cfda": Cfda.objects.filter(program_number=row['cfda_number']).first(),
            'reporting_period_start': submission_attributes.reporting_period_start,
            'reporting_period_end': submission_attributes.reporting_period_end,
            "period_of_performance_start_date": format_date(row['period_of_performance_star']),
            "period_of_performance_current_end_date": format_date(row['period_of_performance_curr']),
        }

        financial_assistance_data = load_data_into_model(
            TransactionFABS(),  # thrown away
            row,
            field_map=fad_field_map,
            value_map=fad_value_map,
            as_dict=True)

        transaction_assistance = TransactionFABS.get_or_create_2(transaction=transaction, **financial_assistance_data)
        transaction_assistance.save()

    logger.info('\n\n\n\nFile D2 time elapsed: {}'.format(time.time() - d_start_time))
Пример #11
0
def load_file_d2(submission_attributes,
                 award_financial_assistance_data,
                 db_cursor,
                 quick,
                 row_preprocessor=no_preprocessing):
    """
    Process and load file D2 broker data (financial assistance award txns).
    """

    d_start_time = time.time()

    if quick:
        setup_broker_fdw()

        parameters = {
            'broker_submission_id': submission_attributes.broker_submission_id
        }
        run_sql_file('usaspending_api/etl/management/load_file_d2.sql',
                     parameters)
        logger.info('\n\n\n\nFile D2 time elapsed: {}'.format(time.time() -
                                                              d_start_time))
        return

    legal_entity_location_field_map = {
        "address_line1": "legal_entity_address_line1",
        "address_line2": "legal_entity_address_line2",
        "address_line3": "legal_entity_address_line3",
        "city_code": "legal_entity_city_code",
        "city_name": "legal_entity_city_name",
        "congressional_code": "legal_entity_congressional",
        "county_code": "legal_entity_county_code",
        "county_name": "legal_entity_county_name",
        "foreign_city_name": "legal_entity_foreign_city",
        "foreign_postal_code": "legal_entity_foreign_posta",
        "foreign_province": "legal_entity_foreign_provi",
        "state_code": "legal_entity_state_code",
        "state_name": "legal_entity_state_name",
        "zip5": "legal_entity_zip5",
        "zip_last4": "legal_entity_zip_last4",
        "location_country_code": "legal_entity_country_code"
    }

    place_of_performance_field_map = {
        "city_name": "place_of_performance_city",
        "performance_code": "place_of_performance_code",
        "congressional_code": "place_of_performance_congr",
        "county_name": "place_of_perform_county_na",
        "foreign_location_description": "place_of_performance_forei",
        "state_name": "place_of_perform_state_nam",
        "zip4": "place_of_performance_zip4a",
        "location_country_code": "place_of_perform_country_c"
    }

    legal_entity_location_value_map = {"recipient_flag": True}

    place_of_performance_value_map = {"place_of_performance_flag": True}

    fad_field_map = {
        "type": "assistance_type",
        "description": "award_description",
    }

    total_rows = len(award_financial_assistance_data)

    start_time = datetime.now()
    for index, row in enumerate(award_financial_assistance_data, 1):
        if not (index % 100):
            logger.info('D2 File Load: Loading row {} of {} ({})'.format(
                str(index), str(total_rows),
                datetime.now() - start_time))

        row = row_preprocessor(row)

        legal_entity_location, created = get_or_create_location(
            legal_entity_location_field_map, row,
            legal_entity_location_value_map)

        recipient_name = row['awardee_or_recipient_legal']
        if recipient_name is None:
            recipient_name = ""

        # Create the legal entity if it doesn't exist
        legal_entity, created = LegalEntity.objects.get_or_create(
            recipient_unique_id=row['awardee_or_recipient_uniqu'],
            recipient_name=recipient_name)

        if created:
            legal_entity_value_map = {
                "location": legal_entity_location,
            }
            legal_entity = load_data_into_model(
                legal_entity, row, value_map=legal_entity_value_map, save=True)

        # Create the place of performance location
        pop_location, created = get_or_create_location(
            place_of_performance_field_map, row,
            place_of_performance_value_map)

        # If awarding toptier agency code (aka CGAC) is not supplied on the D2 record,
        # use the sub tier code to look it up. This code assumes that all incoming
        # records will supply an awarding subtier agency code
        if row['awarding_agency_code'] is None or len(
                row['awarding_agency_code'].strip()) < 1:
            row['awarding_agency_code'] = Agency.get_by_subtier(
                row["awarding_sub_tier_agency_c"]).toptier_agency.cgac_code
        # If funding toptier agency code (aka CGAC) is empty, try using the sub
        # tier funding code to look it up. Unlike the awarding agency, we can't
        # assume that the funding agency subtier code will always be present.
        if row['funding_agency_code'] is None or len(
                row['funding_agency_code'].strip()) < 1:
            funding_agency = Agency.get_by_subtier(
                row["funding_sub_tier_agency_co"])
            row['funding_agency_code'] = (
                funding_agency.toptier_agency.cgac_code
                if funding_agency is not None else None)

        # Find the award that this award transaction belongs to. If it doesn't exist, create it.
        awarding_agency = Agency.get_by_toptier_subtier(
            row['awarding_agency_code'], row["awarding_sub_tier_agency_c"])
        created, award = Award.get_or_create_summary_award(
            awarding_agency=awarding_agency,
            piid=row.get('piid'),
            fain=row.get('fain'),
            uri=row.get('uri'))
        award.save()

        award_update_id_list.append(award.id)

        parent_txn_value_map = {
            "award":
            award,
            "awarding_agency":
            awarding_agency,
            "funding_agency":
            Agency.get_by_toptier_subtier(row['funding_agency_code'],
                                          row["funding_sub_tier_agency_co"]),
            "recipient":
            legal_entity,
            "place_of_performance":
            pop_location,
            'submission':
            submission_attributes,
            "period_of_performance_start_date":
            format_date(row['period_of_performance_star']),
            "period_of_performance_current_end_date":
            format_date(row['period_of_performance_curr']),
            "action_date":
            format_date(row['action_date']),
        }

        transaction_dict = load_data_into_model(
            TransactionNormalized(),  # thrown away
            row,
            field_map=fad_field_map,
            value_map=parent_txn_value_map,
            as_dict=True)

        transaction = TransactionNormalized.get_or_create_transaction(
            **transaction_dict)
        transaction.save()

        fad_value_map = {
            "submission":
            submission_attributes,
            "cfda":
            Cfda.objects.filter(program_number=row['cfda_number']).first(),
            'reporting_period_start':
            submission_attributes.reporting_period_start,
            'reporting_period_end':
            submission_attributes.reporting_period_end,
            "period_of_performance_start_date":
            format_date(row['period_of_performance_star']),
            "period_of_performance_current_end_date":
            format_date(row['period_of_performance_curr']),
        }

        financial_assistance_data = load_data_into_model(
            TransactionFABS(),  # thrown away
            row,
            field_map=fad_field_map,
            value_map=fad_value_map,
            as_dict=True)

        transaction_assistance = TransactionFABS.get_or_create_2(
            transaction=transaction, **financial_assistance_data)
        transaction_assistance.save()

    logger.info('\n\n\n\nFile D2 time elapsed: {}'.format(time.time() -
                                                          d_start_time))
Пример #12
0
def load_file_d1(submission_attributes,
                 procurement_data,
                 db_cursor,
                 quick=False):
    """
    Process and load file D1 broker data (contract award txns).
    """

    legal_entity_location_field_map = {
        "address_line1": "legal_entity_address_line1",
        "address_line2": "legal_entity_address_line2",
        "address_line3": "legal_entity_address_line3",
        "location_country_code": "legal_entity_country_code",
        "city_name": "legal_entity_city_name",
        "congressional_code": "legal_entity_congressional",
        "state_code": "legal_entity_state_code",
        "zip4": "legal_entity_zip4"
    }

    place_of_performance_field_map = {
        # not sure place_of_performance_locat maps exactly to city name
        "city_name": "place_of_performance_locat",
        "congressional_code": "place_of_performance_congr",
        "state_code": "place_of_performance_state",
        "zip4": "place_of_performance_zip4a",
        "location_country_code": "place_of_perform_country_c"
    }

    place_of_performance_value_map = {"place_of_performance_flag": True}

    legal_entity_location_value_map = {"recipient_flag": True}

    contract_field_map = {
        "type": "contract_award_type",
        "description": "award_description"
    }

    d_start_time = time.time()

    if quick:
        parameters = {
            'broker_submission_id': submission_attributes.broker_submission_id
        }
        run_sql_file('usaspending_api/etl/management/load_file_d1.sql',
                     parameters)
        logger.info('\n\n\n\nFile D1 time elapsed: {}'.format(time.time() -
                                                              d_start_time))
        return

    total_rows = len(procurement_data)

    start_time = datetime.now()
    for index, row in enumerate(procurement_data, 1):
        if not (index % 100):
            logger.info('D1 File Load: Loading row {} of {} ({})'.format(
                str(index), str(total_rows),
                datetime.now() - start_time))

        legal_entity_location, created = get_or_create_location(
            legal_entity_location_field_map, row,
            copy(legal_entity_location_value_map))

        recipient_name = row['awardee_or_recipient_legal']
        if recipient_name is None:
            recipient_name = ""

        # Create the legal entity if it doesn't exist
        legal_entity, created = LegalEntity.objects.get_or_create(
            recipient_unique_id=row['awardee_or_recipient_uniqu'],
            recipient_name=recipient_name)

        if created:
            legal_entity_value_map = {
                "location": legal_entity_location,
            }
            legal_entity = load_data_into_model(
                legal_entity, row, value_map=legal_entity_value_map, save=True)

        # Create the place of performance location
        pop_location, created = get_or_create_location(
            place_of_performance_field_map, row,
            copy(place_of_performance_value_map))

        # If awarding toptier agency code (aka CGAC) is not supplied on the D1 record,
        # use the sub tier code to look it up. This code assumes that all incoming
        # records will supply an awarding subtier agency code
        if row['awarding_agency_code'] is None or len(
                row['awarding_agency_code'].strip()) < 1:
            row['awarding_agency_code'] = Agency.get_by_subtier(
                row["awarding_sub_tier_agency_c"]).toptier_agency.cgac_code
        # If funding toptier agency code (aka CGAC) is empty, try using the sub
        # tier funding code to look it up. Unlike the awarding agency, we can't
        # assume that the funding agency subtier code will always be present.
        if row['funding_agency_code'] is None or len(
                row['funding_agency_code'].strip()) < 1:
            funding_agency = Agency.get_by_subtier(
                row["funding_sub_tier_agency_co"])
            row['funding_agency_code'] = (
                funding_agency.toptier_agency.cgac_code
                if funding_agency is not None else None)

        # Find the award that this award transaction belongs to. If it doesn't exist, create it.
        awarding_agency = Agency.get_by_toptier_subtier(
            row['awarding_agency_code'], row["awarding_sub_tier_agency_c"])
        created, award = Award.get_or_create_summary_award(
            awarding_agency=awarding_agency,
            piid=row.get('piid'),
            fain=row.get('fain'),
            uri=row.get('uri'),
            parent_award_piid=row.get(
                'parent_award_id'))  # It is a FAIN/PIID/URI, not our db's pk
        award.save()

        award_update_id_list.append(award.id)
        award_contract_update_id_list.append(award.id)

        parent_txn_value_map = {
            "award":
            award,
            "awarding_agency":
            awarding_agency,
            "funding_agency":
            Agency.get_by_toptier_subtier(row['funding_agency_code'],
                                          row["funding_sub_tier_agency_co"]),
            "recipient":
            legal_entity,
            "place_of_performance":
            pop_location,
            'submission':
            submission_attributes,
            "period_of_performance_start_date":
            format_date(row['period_of_performance_star']),
            "period_of_performance_current_end_date":
            format_date(row['period_of_performance_curr']),
            "action_date":
            format_date(row['action_date']),
        }

        transaction_dict = load_data_into_model(
            TransactionNormalized(),  # thrown away
            row,
            field_map=contract_field_map,
            value_map=parent_txn_value_map,
            as_dict=True)

        transaction = TransactionNormalized.get_or_create_transaction(
            **transaction_dict)
        transaction.save()

        contract_value_map = {
            'submission':
            submission_attributes,
            'reporting_period_start':
            submission_attributes.reporting_period_start,
            'reporting_period_end':
            submission_attributes.reporting_period_end,
            "period_of_performance_potential_end_date":
            format_date(row['period_of_perf_potential_e'])
        }

        contract_instance = load_data_into_model(
            TransactionFPDS(),  # thrown away
            row,
            field_map=contract_field_map,
            value_map=contract_value_map,
            as_dict=True)

        transaction_contract = TransactionFPDS(transaction=transaction,
                                               **contract_instance)
        transaction_contract.save()
    logger.info('\n\n\n\nFile D1 time elapsed: {}'.format(time.time() -
                                                          d_start_time))
    def update_awarding_funding_agency(fiscal_year=None, file_type=None, page=1, limit=500000):

        """
        Uses the TransactionFPDS or TransactionFABS is present to update missing awarding and funding agency
        in TransactionNormalized and Awards
        """

        offset = (page - 1) * limit

        range_low = offset
        range_high = offset + limit

        if file_type == 'D1':
            # List of Transaction FPDS mapping transaction ids, cgac code, and subtier code
            # Filters out FPDS transactions where the transaction is equal to the fiscal year
            transaction_cgac_subtier_map = [
                                               {
                                                'transaction_id': transaction_FPDS['transaction_id'],
                                                'awarding_cgac_code': transaction_FPDS['awarding_agency_code'],
                                                'funding_cgac_code': transaction_FPDS['funding_agency_code'],
                                                'awarding_subtier_code': transaction_FPDS['awarding_sub_tier_agency_c'],
                                                'funding_subtier_code': transaction_FPDS['funding_sub_tier_agency_co']
                                               }
                                               for transaction_FPDS in TransactionFPDS.objects
                                               .filter(transaction__fiscal_year=fiscal_year)
                                               .values('transaction_id',
                                                       'awarding_agency_code',
                                                       'funding_agency_code',
                                                       'awarding_sub_tier_agency_c',
                                                       'funding_sub_tier_agency_co'
                                                       )[range_low:range_high]
                                            ]
        elif file_type == 'D2':
            # List of Transaction FABS mapping transaction ids, cgac code, and subtier code
            # Filters out FABS transactions where the where the transaction is equal to the fiscal year
            transaction_cgac_subtier_map = [
                {
                    'transaction_id': transaction_FABS['transaction_id'],
                    'awarding_cgac_code': transaction_FABS['awarding_agency_code'],
                    'funding_cgac_code': transaction_FABS['funding_agency_code'],
                    'awarding_subtier_code': transaction_FABS['awarding_sub_tier_agency_c'],
                    'funding_subtier_code': transaction_FABS['funding_sub_tier_agency_co']
                }
                for transaction_FABS in TransactionFABS.objects.filter(transaction__fiscal_year=fiscal_year).values(
                    'transaction_id', 'awarding_agency_code', 'funding_agency_code', 'awarding_sub_tier_agency_c',
                    'funding_sub_tier_agency_co')[range_low:range_high]
                ]

        total_rows = len(transaction_cgac_subtier_map)

        logger.info("Processing " + str(total_rows) + " rows of transaction data")
        logger.info("Rows range from {} to {}".format(range_low, range_high))

        # Go through each D1 or D2 transaction to update awarding/funding agency if missing

        index = 1

        start_time = datetime.now()
        for row in transaction_cgac_subtier_map:

            if not (index % 100):
                logger.info('Updating agencies: Loading row {} of {} ({})'.format(str(index),
                                                                                  str(total_rows),
                                                                                  datetime.now() - start_time))

            index += 1

            # Find corresponding transaction
            transaction = TransactionNormalized.objects.filter(id=row['transaction_id']).first()

            # Skips transaction if unable to find it in Transaction Normalized
            if transaction is None:
                logger.error('Unable to find Transaction {}'.format(str(row['transaction_id'])))
                continue

            # Update awarding and funding agency if awarding of funding agency is empty
            awarding_agency = Agency.get_by_toptier_subtier(row['awarding_cgac_code'], row['awarding_subtier_code'])
            funding_agency = Agency.get_by_toptier_subtier(row['funding_cgac_code'], row['funding_subtier_code'])

            # Find the agency that this award transaction belongs to. If it doesn't exist, create it.
            awarding_agency = agency_no_sub_map.get((
                row['awarding_cgac_code'],
                row["awarding_subtier_code"]
            ))

            if awarding_agency is None:
                awarding_agency = agency_cgac_only_map.get(row['awarding_cgac_code'])

            funding_agency = agency_no_sub_map.get((
                row['funding_cgac_code'],
                row["funding_subtier_code"]
            ))

            if funding_agency is None:
                funding_agency = agency_cgac_only_map.get(row['funding_cgac_code'])

            # If unable to get agency moves on to the next transaction
            if awarding_agency is None and funding_agency is None:
                logger.error('Unable to find awarding agency CGAC {} Subtier {} and funding agency CGAC {} Subtier {}'
                             .format(
                                row['awarding_cgac_code'],
                                row['awarding_subtier_code'],
                                row['funding_cgac_code'],
                                row['awarding_subtier_code'])
                             )
                continue

            if awarding_agency is None:
                logger.error('Unable to find awarding agency for CGAC {} Subtier {}'.format(
                                                                                            row['awarding_cgac_code'],
                                                                                            row['awarding_subtier_code']
                                                                                            ))

            elif funding_agency is None:
                pass

            transaction.awarding_agency = awarding_agency
            transaction.funding_agency = funding_agency

            award = Award.objects.filter(id=transaction.award.id).first()

            if award is None:
                logger.error('Unable to find Award {}'.format(str(transaction.award.id)))
                continue

            award.awarding_agency = awarding_agency

            award.funding_agency = funding_agency

            try:
                transaction.save()
                award.save()

            except Exception as e:
                logger.error('Unable to save Transaction {} and Award {}:{}'.format(str(transaction.id),
                                                                                    str(award.id),
                                                                                    str(e)))
Пример #14
0
    def update_transaction_assistance(db_cursor,
                                      fiscal_year=None,
                                      page=1,
                                      limit=500000):

        query = "SELECT * FROM published_award_financial_assistance"
        arguments = []

        fy_begin = "10/01/" + str(fiscal_year - 1)
        fy_end = "09/30/" + str(fiscal_year)

        if fiscal_year:
            if arguments:
                query += " AND"
            else:
                query += " WHERE"
            query += " action_date::Date BETWEEN %s AND %s"
            arguments += [fy_begin]
            arguments += [fy_end]
        query += " ORDER BY published_award_financial_assistance_id LIMIT %s OFFSET %s"
        arguments += [limit, (page - 1) * limit]

        logger.info("Executing query on Broker DB => " + query %
                    (arguments[0], arguments[1], arguments[2], arguments[3]))

        db_cursor.execute(query, arguments)

        logger.info("Running dictfetchall on db_cursor")
        award_financial_assistance_data = dictfetchall(db_cursor)

        fabs_normalized_field_map = {
            "type": "assistance_type",
            "description": "award_description",
            "funding_amount": "total_funding_amount",
        }

        fabs_field_map = {
            "officer_1_name": "high_comp_officer1_full_na",
            "officer_1_amount": "high_comp_officer1_amount",
            "officer_2_name": "high_comp_officer2_full_na",
            "officer_2_amount": "high_comp_officer2_amount",
            "officer_3_name": "high_comp_officer3_full_na",
            "officer_3_amount": "high_comp_officer3_amount",
            "officer_4_name": "high_comp_officer4_full_na",
            "officer_4_amount": "high_comp_officer4_amount",
            "officer_5_name": "high_comp_officer5_full_na",
            "officer_5_amount": "high_comp_officer5_amount",
        }

        logger.info("Getting total rows")

        total_rows = len(award_financial_assistance_data)  # - rows_loaded

        logger.info("Processing " + str(total_rows) +
                    " rows of assistance data")

        # ROW ITERATION STARTS HERE

        award_bulk = []

        transaction_assistance_bulk = []
        transaction_normalized_bulk = []

        awarding_agency_list = []
        funding_agency_list = []

        logger.info("Getting award objects for {} rows...".format(
            len(award_financial_assistance_data)))
        for index, row in enumerate(award_financial_assistance_data, 1):
            # If awarding toptier agency code (aka CGAC) is not supplied on the D2 record,
            # use the sub tier code to look it up. This code assumes that all incoming
            # records will supply an awarding subtier agency code
            if row["awarding_agency_code"] is None or len(
                    row["awarding_agency_code"].strip()) < 1:
                awarding_subtier_agency_id = subtier_agency_map[
                    row["awarding_sub_tier_agency_c"]]
                awarding_toptier_agency_id = subtier_to_agency_map[
                    awarding_subtier_agency_id]["toptier_agency_id"]
                awarding_toptier_code = toptier_agency_map[
                    awarding_toptier_agency_id]
                row["awarding_agency_code"] = awarding_toptier_code

            # If funding toptier agency code (aka CGAC) is empty, try using the sub
            # tier funding code to look it up. Unlike the awarding agency, we can't
            # assume that the funding agency subtier code will always be present.
            if row["funding_agency_code"] is None or len(
                    row["funding_agency_code"].strip()) < 1:
                funding_subtier_agency_id = subtier_agency_map.get(
                    row["funding_sub_tier_agency_co"])
                if funding_subtier_agency_id is not None:
                    funding_toptier_agency_id = subtier_to_agency_map[
                        funding_subtier_agency_id]["toptier_agency_id"]
                    funding_toptier_code = toptier_agency_map[
                        funding_toptier_agency_id]
                else:
                    funding_toptier_code = None
                row["funding_agency_code"] = funding_toptier_code

            # Find the award that this award transaction belongs to. If it doesn't exist, create it.
            awarding_agency = Agency.get_by_toptier_subtier(
                row["awarding_agency_code"], row["awarding_sub_tier_agency_c"])
            funding_agency = Agency.get_by_toptier_subtier(
                row["funding_agency_code"], row["funding_sub_tier_agency_co"])

            awarding_agency_list.append(awarding_agency)
            funding_agency_list.append(funding_agency)

            # award.save() is called in Award.get_or_create_summary_award by default
            created, award = Award.get_or_create_summary_award(
                awarding_agency=awarding_agency,
                fain=row.get("fain"),
                uri=row.get("uri"),
                generated_unique_award_id=row.get("unique_award_key"),
                save=False,
            )

            award_bulk.append(award)
            award_update_id_list.append(award.id)
            award_assistance_update_id_list.append(award.id)

        logger.info("Bulk creating {} award rows...".format(len(award_bulk)))
        try:
            Award.objects.bulk_create(award_bulk)
        except IntegrityError:
            logger.info("!!! DUPLICATES FOUND. Continuing... ")

        logger.info("Getting transaction_normalized for {} rows...".format(
            len(award_financial_assistance_data)))
        for index, row in enumerate(award_financial_assistance_data, 1):

            parent_txn_value_map = {
                "award":
                award_bulk[index - 1],
                "awarding_agency":
                awarding_agency_list[index - 1],
                "funding_agency":
                funding_agency_list[index - 1],
                "period_of_performance_start_date":
                format_date(row["period_of_performance_star"]),
                "period_of_performance_current_end_date":
                format_date(row["period_of_performance_curr"]),
                "action_date":
                format_date(row["action_date"]),
            }

            transaction_dict = load_data_into_model(
                TransactionNormalized(),  # thrown away
                row,
                field_map=fabs_normalized_field_map,
                value_map=parent_txn_value_map,
                as_dict=True,
            )

            transaction_normalized = TransactionNormalized.get_or_create_transaction(
                **transaction_dict)
            transaction_normalized.fiscal_year = fy(
                transaction_normalized.action_date)
            transaction_normalized_bulk.append(transaction_normalized)

        logger.info("Bulk creating {} TransactionNormalized rows...".format(
            len(transaction_normalized_bulk)))
        try:
            TransactionNormalized.objects.bulk_create(
                transaction_normalized_bulk)
        except IntegrityError:
            logger.info(
                "Tried and failed to insert duplicate transaction_normalized row. Continuing... "
            )

        for index, row in enumerate(award_financial_assistance_data, 1):
            financial_assistance_data = load_data_into_model(
                TransactionFABS(),
                row,
                field_map=fabs_field_map,
                as_dict=True  # thrown away
            )

            transaction_assistance = TransactionFABS(
                transaction=transaction_normalized_bulk[index - 1],
                **financial_assistance_data)
            transaction_assistance_bulk.append(transaction_assistance)

        logger.info("Bulk creating TransactionFABS rows...")
        try:
            TransactionFABS.objects.bulk_create(transaction_assistance_bulk)
        except IntegrityError:
            logger.info("!!! DUPLICATES FOUND. Continuing... ")
Пример #15
0
    def update_transaction_contract(db_cursor,
                                    fiscal_year=None,
                                    page=1,
                                    limit=500000):

        # logger.info("Getting IDs for what's currently in the DB...")
        # current_ids = TransactionFPDS.objects
        #
        # if fiscal_year:
        #     current_ids = current_ids.filter(action_date__fy=fiscal_year)
        #
        # current_ids = current_ids.values_list('detached_award_procurement_id', flat=True)

        query = "SELECT * FROM detached_award_procurement"
        arguments = []

        fy_begin = "10/01/" + str(fiscal_year - 1)
        fy_end = "09/30/" + str(fiscal_year)

        if fiscal_year:
            if arguments:
                query += " AND"
            else:
                query += " WHERE"
            query += " action_date::Date BETWEEN %s AND %s"
            arguments += [fy_begin]
            arguments += [fy_end]
        query += " ORDER BY detached_award_procurement_id LIMIT %s OFFSET %s"
        arguments += [limit, (page - 1) * limit]

        logger.info("Executing query on Broker DB => " + query %
                    (arguments[0], arguments[1], arguments[2], arguments[3]))

        db_cursor.execute(query, arguments)

        logger.info("Running dictfetchall on db_cursor")
        procurement_data = dictfetchall(db_cursor)

        fpds_normalized_field_map = {
            "type": "contract_award_type",
            "description": "award_description"
        }

        fpds_field_map = {
            "officer_1_name": "high_comp_officer1_full_na",
            "officer_1_amount": "high_comp_officer1_amount",
            "officer_2_name": "high_comp_officer2_full_na",
            "officer_2_amount": "high_comp_officer2_amount",
            "officer_3_name": "high_comp_officer3_full_na",
            "officer_3_amount": "high_comp_officer3_amount",
            "officer_4_name": "high_comp_officer4_full_na",
            "officer_4_amount": "high_comp_officer4_amount",
            "officer_5_name": "high_comp_officer5_full_na",
            "officer_5_amount": "high_comp_officer5_amount",
        }

        logger.info("Getting total rows")

        total_rows = len(procurement_data)  # - rows_loaded

        logger.info("Processing " + str(total_rows) +
                    " rows of procurement data")

        start_time = datetime.now()
        for index, row in enumerate(procurement_data, 1):
            with db_transaction.atomic():

                if not (index % 100):
                    logger.info(
                        "D1 File Load: Loading row {} of {} ({})".format(
                            str(index), str(total_rows),
                            datetime.now() - start_time))

                # If awarding toptier agency code (aka CGAC) is not supplied on the D2 record,
                # use the sub tier code to look it up. This code assumes that all incoming
                # records will supply an awarding subtier agency code
                if row["awarding_agency_code"] is None or len(
                        row["awarding_agency_code"].strip()) < 1:
                    awarding_subtier_agency_id = subtier_agency_map[
                        row["awarding_sub_tier_agency_c"]]
                    awarding_toptier_agency_id = subtier_to_agency_map[
                        awarding_subtier_agency_id]["toptier_agency_id"]
                    awarding_toptier_code = toptier_agency_map[
                        awarding_toptier_agency_id]
                    row["awarding_agency_code"] = awarding_toptier_code

                # If funding toptier agency code (aka CGAC) is empty, try using the sub
                # tier funding code to look it up. Unlike the awarding agency, we can't
                # assume that the funding agency subtier code will always be present.
                if row["funding_agency_code"] is None or len(
                        row["funding_agency_code"].strip()) < 1:
                    funding_subtier_agency_id = subtier_agency_map.get(
                        row["funding_sub_tier_agency_co"])
                    if funding_subtier_agency_id is not None:
                        funding_toptier_agency_id = subtier_to_agency_map[
                            funding_subtier_agency_id]["toptier_agency_id"]
                        funding_toptier_code = toptier_agency_map[
                            funding_toptier_agency_id]
                    else:
                        funding_toptier_code = None
                    row["funding_agency_code"] = funding_toptier_code

                # Find the award that this award transaction belongs to. If it doesn't exist, create it.
                awarding_agency = Agency.get_by_toptier_subtier(
                    row["awarding_agency_code"],
                    row["awarding_sub_tier_agency_c"])
                created, award = Award.get_or_create_summary_award(
                    awarding_agency=awarding_agency,
                    piid=row.get("piid"),
                    fain=row.get("fain"),
                    uri=row.get("uri"),
                    parent_award_piid=row.get("parent_award_id"),
                    generated_unique_award_id=row.get("unique_award_key"),
                )
                award.save()

                award_update_id_list.append(award.id)
                award_contract_update_id_list.append(award.id)

                parent_txn_value_map = {
                    "award":
                    award,
                    "awarding_agency":
                    awarding_agency,
                    "funding_agency":
                    Agency.get_by_toptier_subtier(
                        row["funding_agency_code"],
                        row["funding_sub_tier_agency_co"]),
                    "period_of_performance_start_date":
                    format_date(row["period_of_performance_star"]),
                    "period_of_performance_current_end_date":
                    format_date(row["period_of_performance_curr"]),
                    "action_date":
                    format_date(row["action_date"]),
                }

                transaction_dict = load_data_into_model(
                    TransactionNormalized(),  # thrown away
                    row,
                    field_map=fpds_normalized_field_map,
                    value_map=parent_txn_value_map,
                    as_dict=True,
                )

                transaction = TransactionNormalized.get_or_create_transaction(
                    **transaction_dict)
                transaction.save()

                contract_instance = load_data_into_model(
                    TransactionFPDS(),
                    row,
                    field_map=fpds_field_map,
                    as_dict=True  # thrown away
                )

                transaction_contract = TransactionFPDS(transaction=transaction,
                                                       **contract_instance)
                # catch exception and do nothing if we see
                # "django.db.utils.IntegrityError: duplicate key value violates unique constraint"
                try:
                    transaction_contract.save()
                except IntegrityError:
                    pass
Пример #16
0
def load_file_d2(submission_attributes, award_financial_assistance_data,
                 db_cursor):
    """
    Process and load file D2 broker data (financial assistance award txns).
    """
    legal_entity_location_field_map = {
        "address_line1": "legal_entity_address_line1",
        "address_line2": "legal_entity_address_line2",
        "address_line3": "legal_entity_address_line3",
        "city_code": "legal_entity_city_code",
        "city_name": "legal_entity_city_name",
        "congressional_code": "legal_entity_congressional",
        "county_code": "legal_entity_county_code",
        "county_name": "legal_entity_county_name",
        "foreign_city_name": "legal_entity_foreign_city",
        "foreign_postal_code": "legal_entity_foreign_posta",
        "foreign_province": "legal_entity_foreign_provi",
        "state_code": "legal_entity_state_code",
        "state_name": "legal_entity_state_name",
        "zip5": "legal_entity_zip5",
        "zip_last4": "legal_entity_zip_last4",
        "location_country_code": "legal_entity_country_code"
    }

    place_of_performance_field_map = {
        "city_name": "place_of_performance_city",
        "performance_code": "place_of_performance_code",
        "congressional_code": "place_of_performance_congr",
        "county_name": "place_of_perform_county_na",
        "foreign_location_description": "place_of_performance_forei",
        "state_name": "place_of_perform_state_nam",
        "zip4": "place_of_performance_zip4a",
        "location_country_code": "place_of_perform_country_c"
    }

    legal_entity_location_value_map = {"recipient_flag": True}

    place_of_performance_value_map = {"place_of_performance_flag": True}

    fad_field_map = {
        "type": "assistance_type",
        "description": "award_description",
    }

    for row in award_financial_assistance_data:

        legal_entity_location, created = get_or_create_location(
            legal_entity_location_field_map, row,
            legal_entity_location_value_map)

        # Create the legal entity if it doesn't exist
        try:
            legal_entity = LegalEntity.objects.get(
                recipient_unique_id=row['awardee_or_recipient_uniqu'])
        except ObjectDoesNotExist:
            legal_entity_value_map = {
                "location": legal_entity_location,
                "legal_entity_id": row['awardee_or_recipient_uniqu']
            }
            legal_entity = load_data_into_model(
                LegalEntity(),
                row,
                value_map=legal_entity_value_map,
                save=True)

        # Create the place of performance location
        pop_location, created = get_or_create_location(
            place_of_performance_field_map, row,
            place_of_performance_value_map)

        # If toptier agency code (aka CGAC) is not supplied on the D2 record,
        # use the sub tier code to look it up
        if row['awarding_agency_code'] is None:
            row['awarding_agency_code'] = Agency.get_by_subtier(
                row["awarding_sub_tier_agency_c"]).toptier_agency.cgac_code
        if row['funding_agency_code'] is None:
            row['funding_agency_code'] = Agency.get_by_subtier(
                row["funding_sub_tier_agency_co"]).toptier_agency.cgac_code

        # Find the award that this award transaction belongs to. If it doesn't exist, create it.
        awarding_agency = Agency.get_by_toptier_subtier(
            row['awarding_agency_code'], row["awarding_sub_tier_agency_c"])
        created, award = Award.get_or_create_summary_award(
            awarding_agency=awarding_agency,
            piid=row.get('piid'),
            fain=row.get('fain'),
            uri=row.get('uri'),
            parent_award_id=row.get('parent_award_id'))
        award.save()

        AWARD_UPDATE_ID_LIST.append(award.id)

        parent_txn_value_map = {
            "award":
            award,
            "awarding_agency":
            awarding_agency,
            "funding_agency":
            Agency.get_by_toptier_subtier(row['funding_agency_code'],
                                          row["funding_sub_tier_agency_co"]),
            "recipient":
            legal_entity,
            "place_of_performance":
            pop_location,
            'submission':
            submission_attributes,
            "period_of_performance_start_date":
            format_date(row['period_of_performance_star']),
            "period_of_performance_current_end_date":
            format_date(row['period_of_performance_curr']),
            "action_date":
            format_date(row['action_date']),
        }

        transaction_instance = load_data_into_model(
            Transaction(),
            row,
            field_map=fad_field_map,
            value_map=parent_txn_value_map,
            as_dict=True)

        transaction_instance, created = Transaction.objects.get_or_create(
            **transaction_instance)

        fad_value_map = {
            "transaction":
            transaction_instance,
            "submission":
            submission_attributes,
            "cfda":
            CFDAProgram.objects.filter(
                program_number=row['cfda_number']).first(),
            'reporting_period_start':
            submission_attributes.reporting_period_start,
            'reporting_period_end':
            submission_attributes.reporting_period_end,
            "period_of_performance_start_date":
            format_date(row['period_of_performance_star']),
            "period_of_performance_current_end_date":
            format_date(row['period_of_performance_curr']),
        }

        financial_assistance_data = load_data_into_model(
            TransactionAssistance(),
            row,
            field_map=fad_field_map,
            value_map=fad_value_map,
            save=True)
Пример #17
0
    def update_awarding_funding_agency(fiscal_year=None,
                                       file_type=None,
                                       page=1,
                                       limit=500000):
        """
        Uses the TransactionFPDS or TransactionFABS is present to update missing awarding and funding agency
        in TransactionNormalized and Awards
        """

        offset = (page - 1) * limit

        range_low = offset
        range_high = offset + limit

        if file_type == 'D1':
            # List of Transaction FPDS mapping transaction ids, cgac code, and subtier code
            # Filters out FPDS transactions where the transaction is equal to the fiscal year
            transaction_cgac_subtier_map = [{
                'transaction_id':
                transaction_FPDS['transaction_id'],
                'awarding_cgac_code':
                transaction_FPDS['awarding_agency_code'],
                'funding_cgac_code':
                transaction_FPDS['funding_agency_code'],
                'awarding_subtier_code':
                transaction_FPDS['awarding_sub_tier_agency_c'],
                'funding_subtier_code':
                transaction_FPDS['funding_sub_tier_agency_co']
            } for transaction_FPDS in TransactionFPDS.objects.filter(
                transaction__fiscal_year=fiscal_year).values(
                    'transaction_id', 'awarding_agency_code',
                    'funding_agency_code', 'awarding_sub_tier_agency_c',
                    'funding_sub_tier_agency_co')[range_low:range_high]]
        elif file_type == 'D2':
            # List of Transaction FABS mapping transaction ids, cgac code, and subtier code
            # Filters out FABS transactions where the where the transaction is equal to the fiscal year
            transaction_cgac_subtier_map = [{
                'transaction_id':
                transaction_FABS['transaction_id'],
                'awarding_cgac_code':
                transaction_FABS['awarding_agency_code'],
                'funding_cgac_code':
                transaction_FABS['funding_agency_code'],
                'awarding_subtier_code':
                transaction_FABS['awarding_sub_tier_agency_c'],
                'funding_subtier_code':
                transaction_FABS['funding_sub_tier_agency_co']
            } for transaction_FABS in TransactionFABS.objects.filter(
                transaction__fiscal_year=fiscal_year).values(
                    'transaction_id', 'awarding_agency_code',
                    'funding_agency_code', 'awarding_sub_tier_agency_c',
                    'funding_sub_tier_agency_co')[range_low:range_high]]

        total_rows = len(transaction_cgac_subtier_map)

        logger.info("Processing " + str(total_rows) +
                    " rows of transaction data")
        logger.info("Rows range from {} to {}".format(range_low, range_high))

        # Go through each D1 or D2 transaction to update awarding/funding agency if missing

        index = 1

        start_time = datetime.now()
        for row in transaction_cgac_subtier_map:

            if not (index % 100):
                logger.info(
                    'Updating agencies: Loading row {} of {} ({})'.format(
                        str(index), str(total_rows),
                        datetime.now() - start_time))

            index += 1

            # Find corresponding transaction
            transaction = TransactionNormalized.objects.filter(
                id=row['transaction_id']).first()

            # Skips transaction if unable to find it in Transaction Normalized
            if transaction is None:
                logger.error('Unable to find Transaction {}'.format(
                    str(row['transaction_id'])))
                continue

            # Update awarding and funding agency if awarding of funding agency is empty
            awarding_agency = Agency.get_by_toptier_subtier(
                row['awarding_cgac_code'], row['awarding_subtier_code'])
            funding_agency = Agency.get_by_toptier_subtier(
                row['funding_cgac_code'], row['funding_subtier_code'])

            # Find the agency that this award transaction belongs to. If it doesn't exist, create it.
            awarding_agency = agency_no_sub_map.get(
                (row['awarding_cgac_code'], row["awarding_subtier_code"]))

            if awarding_agency is None:
                awarding_agency = agency_cgac_only_map.get(
                    row['awarding_cgac_code'])

            funding_agency = agency_no_sub_map.get(
                (row['funding_cgac_code'], row["funding_subtier_code"]))

            if funding_agency is None:
                funding_agency = agency_cgac_only_map.get(
                    row['funding_cgac_code'])

            # If unable to get agency moves on to the next transaction
            if awarding_agency is None and funding_agency is None:
                logger.error(
                    'Unable to find awarding agency CGAC {} Subtier {} and funding agency CGAC {} Subtier {}'
                    .format(row['awarding_cgac_code'],
                            row['awarding_subtier_code'],
                            row['funding_cgac_code'],
                            row['awarding_subtier_code']))
                continue

            if awarding_agency is None:
                logger.error(
                    'Unable to find awarding agency for CGAC {} Subtier {}'.
                    format(row['awarding_cgac_code'],
                           row['awarding_subtier_code']))

            elif funding_agency is None:
                pass

            transaction.awarding_agency = awarding_agency
            transaction.funding_agency = funding_agency

            award = Award.objects.filter(id=transaction.award.id).first()

            if award is None:
                logger.error('Unable to find Award {}'.format(
                    str(transaction.award.id)))
                continue

            award.awarding_agency = awarding_agency

            award.funding_agency = funding_agency

            try:
                transaction.save()
                award.save()

            except Exception as e:
                logger.error(
                    'Unable to save Transaction {} and Award {}:{}'.format(
                        str(transaction.id), str(award.id), str(e)))
def load_file_d1(submission_attributes, procurement_data, db_cursor, quick=False):
    """
    Process and load file D1 broker data (contract award txns).
    """

    legal_entity_location_field_map = {
        "address_line1": "legal_entity_address_line1",
        "address_line2": "legal_entity_address_line2",
        "address_line3": "legal_entity_address_line3",
        "location_country_code": "legal_entity_country_code",
        "city_name": "legal_entity_city_name",
        "congressional_code": "legal_entity_congressional",
        "state_code": "legal_entity_state_code",
        "zip4": "legal_entity_zip4"
    }

    place_of_performance_field_map = {
        # not sure place_of_performance_locat maps exactly to city name
        "city_name": "place_of_performance_locat",
        "congressional_code": "place_of_performance_congr",
        "state_code": "place_of_performance_state",
        "zip4": "place_of_performance_zip4a",
        "location_country_code": "place_of_perform_country_c"
    }

    place_of_performance_value_map = {
        "place_of_performance_flag": True
    }

    legal_entity_location_value_map = {
        "recipient_flag": True
    }

    contract_field_map = {
        "type": "contract_award_type",
        "description": "award_description"
    }

    d_start_time = time.time()

    if quick:
        parameters = {'broker_submission_id': submission_attributes.broker_submission_id}
        run_sql_file('usaspending_api/etl/management/load_file_d1.sql', parameters)
        logger.info('\n\n\n\nFile D1 time elapsed: {}'.format(time.time() - d_start_time))
        return

    total_rows = len(procurement_data)

    start_time = datetime.now()
    for index, row in enumerate(procurement_data, 1):
        if not (index % 100):
            logger.info('D1 File Load: Loading row {} of {} ({})'.format(str(index),
                                                                         str(total_rows),
                                                                         datetime.now() - start_time))

        legal_entity_location, created = get_or_create_location(
            legal_entity_location_field_map, row, copy(legal_entity_location_value_map)
        )

        recipient_name = row['awardee_or_recipient_legal']
        if recipient_name is None:
            recipient_name = ""

        # Create the legal entity if it doesn't exist
        legal_entity, created = LegalEntity.objects.get_or_create(
            recipient_unique_id=row['awardee_or_recipient_uniqu'],
            recipient_name=recipient_name
        )

        if created:
            legal_entity_value_map = {
                "location": legal_entity_location,
            }
            legal_entity = load_data_into_model(legal_entity, row, value_map=legal_entity_value_map, save=True)

        # Create the place of performance location
        pop_location, created = get_or_create_location(
            place_of_performance_field_map, row, copy(place_of_performance_value_map))

        # If awarding toptier agency code (aka CGAC) is not supplied on the D1 record,
        # use the sub tier code to look it up. This code assumes that all incoming
        # records will supply an awarding subtier agency code
        if row['awarding_agency_code'] is None or len(row['awarding_agency_code'].strip()) < 1:
            row['awarding_agency_code'] = Agency.get_by_subtier(
                row["awarding_sub_tier_agency_c"]).toptier_agency.cgac_code
        # If funding toptier agency code (aka CGAC) is empty, try using the sub
        # tier funding code to look it up. Unlike the awarding agency, we can't
        # assume that the funding agency subtier code will always be present.
        if row['funding_agency_code'] is None or len(row['funding_agency_code'].strip()) < 1:
            funding_agency = Agency.get_by_subtier(row["funding_sub_tier_agency_co"])
            row['funding_agency_code'] = (
                funding_agency.toptier_agency.cgac_code if funding_agency is not None
                else None)

        # Find the award that this award transaction belongs to. If it doesn't exist, create it.
        awarding_agency = Agency.get_by_toptier_subtier(
            row['awarding_agency_code'],
            row["awarding_sub_tier_agency_c"]
        )
        created, award = Award.get_or_create_summary_award(
            awarding_agency=awarding_agency,
            piid=row.get('piid'),
            fain=row.get('fain'),
            uri=row.get('uri'),
            parent_award_piid=row.get('parent_award_id'))  # It is a FAIN/PIID/URI, not our db's pk
        award.save()

        award_update_id_list.append(award.id)
        award_contract_update_id_list.append(award.id)

        parent_txn_value_map = {
            "award": award,
            "awarding_agency": awarding_agency,
            "funding_agency": Agency.get_by_toptier_subtier(row['funding_agency_code'],
                                                            row["funding_sub_tier_agency_co"]),
            "recipient": legal_entity,
            "place_of_performance": pop_location,
            'submission': submission_attributes,
            "period_of_performance_start_date": format_date(row['period_of_performance_star']),
            "period_of_performance_current_end_date": format_date(row['period_of_performance_curr']),
            "action_date": format_date(row['action_date']),
        }

        transaction_dict = load_data_into_model(
            TransactionNormalized(),  # thrown away
            row,
            field_map=contract_field_map,
            value_map=parent_txn_value_map,
            as_dict=True)

        transaction = TransactionNormalized.get_or_create_transaction(**transaction_dict)
        transaction.save()

        contract_value_map = {
            'submission': submission_attributes,
            'reporting_period_start': submission_attributes.reporting_period_start,
            'reporting_period_end': submission_attributes.reporting_period_end,
            "period_of_performance_potential_end_date": format_date(row['period_of_perf_potential_e'])
        }

        contract_instance = load_data_into_model(
            TransactionFPDS(),  # thrown away
            row,
            field_map=contract_field_map,
            value_map=contract_value_map,
            as_dict=True)

        transaction_contract = TransactionFPDS(transaction=transaction, **contract_instance)
        transaction_contract.save()
    logger.info('\n\n\n\nFile D1 time elapsed: {}'.format(time.time() - d_start_time))