Exemplo n.º 1
0
    def load_transaction_normalized(self, fabs_broker_data, total_rows):
        start_time = datetime.now()
        for index, row in enumerate(fabs_broker_data, 1):
            if not (index % 10000):
                logger.info(
                    'Transaction Normalized: Loading row {} of {} ({})'.format(
                        str(index), str(total_rows),
                        datetime.now() - start_time))
            parent_txn_value_map = {
                "award":
                award_lookup[index - 1],
                "awarding_agency":
                awarding_agency_list[index - 1],
                "funding_agency":
                funding_agency_list[index - 1],
                "recipient":
                legal_entity_lookup[index - 1],
                "place_of_performance":
                pop_bulk[index - 1],
                "period_of_performance_start_date":
                format_date(row['period_of_performance_star']),
                "period_of_performance_current_end_date":
                format_date(row['period_of_performance_curr']),
                "action_date":
                format_date(row['action_date']),
                "last_modified_date":
                row['modified_at']
            }

            fad_field_map = {
                "type": "assistance_type",
                "description": "award_description",
            }

            transaction_normalized = load_data_into_model(
                TransactionNormalized(),
                row,
                field_map=fad_field_map,
                value_map=parent_txn_value_map,
                as_dict=False,
                save=False)

            transaction_normalized.fiscal_year = fy(
                transaction_normalized.action_date)
            transaction_normalized_bulk.append(transaction_normalized)

        logger.info(
            'Bulk creating Transaction Normalized (batch_size: {})...'.format(
                BATCH_SIZE))
        TransactionNormalized.objects.bulk_create(transaction_normalized_bulk,
                                                  batch_size=BATCH_SIZE)
Exemplo n.º 2
0
 def totals(self):
     outlays = defaultdict(Decimal)
     obligations = defaultdict(Decimal)
     budget_authority = defaultdict(Decimal)
     for ab in self.account_balances.all():
         fiscal_year = fy(ab.reporting_period_start)
         budget_authority[
             fiscal_year] += ab.budget_authority_appropriated_amount_cpe
         outlays[fiscal_year] += ab.gross_outlay_amount_by_tas_cpe
         obligations[
             fiscal_year] += ab.obligations_incurred_total_by_tas_cpe
     results = {
         'outgoing': {
             'outlays': outlays,
             'obligations': obligations,
             'budget_authority': budget_authority,
         },
         'incoming': {}
     }
     return results
Exemplo n.º 3
0
    def load_quarterly_spreadsheets(self, quarter, results, overall_totals):
        """Special procedure for getting quarterly update .xls files

        These are downloaded from
        MAX Information and Reports (Executive, Legislative, and Judicial Users)
        https://max.omb.gov/maxportal/document/SF133/Budget/FY%202017%20-%20SF%20133%20Reports%20on%20Budget%20Execution%20and%20Budgetary%20Resources.html
        """

        quarterly_path = os.path.join(self.directory, 'quarterly', '*.xls')
        this_fy = fy(date.today())
        overall_totals[this_fy] = 0
        amount_column = 'Q{}_AMT'.format(quarter)
        for filename in glob.glob(quarterly_path):
            workbook = open_workbook(filename)
            sheet = workbook.sheets()[0]
            headers = [cell.value for cell in sheet.row(0)]
            for i in range(1, sheet.nrows):
                row = dict(zip(headers, (cell.value for cell in sheet.row(i))))
                if row['LNO'] == '2500':
                    dollars = int(row[amount_column]) * 1000
                    results[(row['TRAG'], None, this_fy)] = dollars
                    overall_totals[this_fy] += dollars
Exemplo n.º 4
0
 def totals_program_activity(self):
     results = []
     for pa in self.program_activities:
         obligations = defaultdict(Decimal)
         outlays = defaultdict(Decimal)
         for pb in self.program_balances.filter(program_activity=pa):
             reporting_fiscal_year = fy(
                 pb.submission.reporting_period_start)
             # TODO: once it is present, use the reporting_fiscal_year directly
             obligations[
                 reporting_fiscal_year] += pb.obligations_incurred_by_program_object_class_cpe
             outlays[
                 reporting_fiscal_year] += pb.gross_outlay_amount_by_program_object_class_cpe
         result = {
             'id': pa.id,
             'program_activity_name': pa.program_activity_name,
             'program_activity_code': pa.program_activity_code,
             'obligations': obligations,
             'outlays': outlays,
         }
         results.append(result)
     return results
Exemplo n.º 5
0
 def totals_object_class(self):
     results = []
     for object_class in self.object_classes:
         obligations = defaultdict(Decimal)
         outlays = defaultdict(Decimal)
         for pb in self.program_balances.filter(object_class=object_class):
             reporting_fiscal_year = fy(
                 pb.submission.reporting_period_start)
             obligations[
                 reporting_fiscal_year] += pb.obligations_incurred_by_program_object_class_cpe
             outlays[
                 reporting_fiscal_year] += pb.gross_outlay_amount_by_program_object_class_cpe
         result = {
             'major_object_class_code': None,
             'major_object_class_name':
             None,  # TODO: enable once ObjectClass populated
             'object_class': object_class.object_class,  # TODO: remove
             'outlays': obligations,
             'obligations': outlays,
         }
         results.append(result)
     return results
Exemplo n.º 6
0
    def insert_new_fpds(self, to_insert, total_rows):
        logger.info('Starting insertion of new FPDS data')

        place_of_performance_field_map = {
            "location_country_code": "place_of_perform_country_c",
            "country_name": "place_of_perf_country_desc",
            "state_code": "place_of_performance_state",
            "state_name": "place_of_perfor_state_desc",
            "city_name": "place_of_perform_city_name",
            "county_name": "place_of_perform_county_na",
            "county_code": "place_of_perform_county_co",
            "zip_4a": "place_of_performance_zip4a",
            "congressional_code": "place_of_performance_congr",
            "zip_last4": "place_of_perform_zip_last4",
            "zip5": "place_of_performance_zip5"
        }

        legal_entity_location_field_map = {
            "location_country_code": "legal_entity_country_code",
            "country_name": "legal_entity_country_name",
            "state_code": "legal_entity_state_code",
            "state_name": "legal_entity_state_descrip",
            "city_name": "legal_entity_city_name",
            "county_name": "legal_entity_county_name",
            "county_code": "legal_entity_county_code",
            "address_line1": "legal_entity_address_line1",
            "address_line2": "legal_entity_address_line2",
            "address_line3": "legal_entity_address_line3",
            "zip4": "legal_entity_zip4",
            "congressional_code": "legal_entity_congressional",
            "zip_last4": "legal_entity_zip_last4",
            "zip5": "legal_entity_zip5"
        }

        start_time = datetime.now()

        for index, row in enumerate(to_insert, 1):
            if not (index % 1000):
                logger.info(
                    'Inserting Stale FPDS: Inserting row {} of {} ({})'.format(
                        str(index), str(total_rows),
                        datetime.now() - start_time))

            for key in row:
                if isinstance(row[key], str):
                    row[key] = row[key].upper()

            # Create new LegalEntityLocation and LegalEntity from the row data
            legal_entity_location = create_location(
                legal_entity_location_field_map, row, {
                    "recipient_flag": True,
                    "is_fpds": True
                })
            recipient_name = row['awardee_or_recipient_legal']
            legal_entity = LegalEntity.objects.create(
                recipient_unique_id=row['awardee_or_recipient_uniqu'],
                recipient_name=recipient_name
                if recipient_name is not None else "")
            legal_entity_value_map = {
                "location":
                legal_entity_location,
                "business_categories":
                get_business_categories(row=row, data_type='fpds'),
                "is_fpds":
                True
            }
            set_legal_entity_boolean_fields(row)
            legal_entity = load_data_into_model(
                legal_entity, row, value_map=legal_entity_value_map, save=True)

            # Create the place of performance location
            pop_location = create_location(place_of_performance_field_map, row,
                                           {"place_of_performance_flag": True})

            # Find the toptier awards from the subtier awards
            awarding_agency = Agency.get_by_subtier_only(
                row["awarding_sub_tier_agency_c"])
            funding_agency = Agency.get_by_subtier_only(
                row["funding_sub_tier_agency_co"])

            # Generate the unique Award ID
            # "CONT_AW_" + agency_id + referenced_idv_agency_iden + piid + parent_award_id
            generated_unique_id = 'CONT_AW_' + (row['agency_id'] if row['agency_id'] else '-NONE-') + '_' + \
                (row['referenced_idv_agency_iden'] if row['referenced_idv_agency_iden'] else '-NONE-') + '_' + \
                (row['piid'] if row['piid'] else '-NONE-') + '_' + \
                (row['parent_award_id'] if row['parent_award_id'] else '-NONE-')

            # Create the summary Award
            (created, award) = Award.get_or_create_summary_award(
                generated_unique_award_id=generated_unique_id,
                piid=row['piid'])
            award.parent_award_piid = row.get('parent_award_id')
            award.save()

            # Append row to list of Awards updated
            award_update_id_list.append(award.id)

            try:
                last_mod_date = datetime.strptime(str(
                    row['last_modified']), "%Y-%m-%d %H:%M:%S.%f").date()
            except ValueError:
                last_mod_date = datetime.strptime(str(row['last_modified']),
                                                  "%Y-%m-%d %H:%M:%S").date()
            parent_txn_value_map = {
                "award":
                award,
                "awarding_agency":
                awarding_agency,
                "funding_agency":
                funding_agency,
                "recipient":
                legal_entity,
                "place_of_performance":
                pop_location,
                "period_of_performance_start_date":
                format_date(row['period_of_performance_star']),
                "period_of_performance_current_end_date":
                format_date(row['period_of_performance_curr']),
                "action_date":
                format_date(row['action_date']),
                "last_modified_date":
                last_mod_date,
                "transaction_unique_id":
                row['detached_award_proc_unique'],
                "generated_unique_award_id":
                generated_unique_id,
                "is_fpds":
                True
            }

            contract_field_map = {
                "type": "contract_award_type",
                "type_description": "contract_award_type_desc",
                "description": "award_description"
            }

            transaction_normalized_dict = load_data_into_model(
                TransactionNormalized(),  # thrown away
                row,
                field_map=contract_field_map,
                value_map=parent_txn_value_map,
                as_dict=True)

            contract_instance = load_data_into_model(
                TransactionFPDS(),  # thrown away
                row,
                as_dict=True)

            detached_award_proc_unique = contract_instance[
                'detached_award_proc_unique']
            unique_fpds = TransactionFPDS.objects.filter(
                detached_award_proc_unique=detached_award_proc_unique)

            if unique_fpds.first():
                transaction_normalized_dict["update_date"] = datetime.utcnow()
                transaction_normalized_dict["fiscal_year"] = fy(
                    transaction_normalized_dict["action_date"])

                # update TransactionNormalized
                TransactionNormalized.objects.filter(id=unique_fpds.first().transaction.id).\
                    update(**transaction_normalized_dict)

                # update TransactionFPDS
                unique_fpds.update(**contract_instance)
            else:
                # create TransactionNormalized
                transaction = TransactionNormalized(
                    **transaction_normalized_dict)
                transaction.save()

                # create TransactionFPDS
                transaction_fpds = TransactionFPDS(transaction=transaction,
                                                   **contract_instance)
                transaction_fpds.save()
Exemplo n.º 7
0
 def get_date_signed__fy(self, obj):
     return fy(obj.date_signed)
    def update_transaction_assistance(db_cursor, fiscal_year=None, page=1, limit=500000):

        # logger.info("Getting IDs for what's currently in the DB...")
        # current_ids = TransactionFABS.objects
        #
        # if fiscal_year:
        #     current_ids = current_ids.filter(action_date__fy=fiscal_year)
        #
        # current_ids = current_ids.values_list('published_award_financial_assistance_id', flat=True)

        query = "SELECT * FROM published_award_financial_assistance"
        arguments = []

        fy_begin = '10/01/' + str(fiscal_year - 1)
        fy_end = '09/30/' + str(fiscal_year)

        if fiscal_year:
            if arguments:
                query += " AND"
            else:
                query += " WHERE"
            query += ' action_date::Date BETWEEN %s AND %s'
            arguments += [fy_begin]
            arguments += [fy_end]
        query += ' ORDER BY published_award_financial_assistance_id LIMIT %s OFFSET %s'
        arguments += [limit, (page-1)*limit]

        logger.info("Executing query on Broker DB => " + query % (arguments[0], arguments[1],
                                                                  arguments[2], arguments[3]))

        db_cursor.execute(query, arguments)

        logger.info("Running dictfetchall on db_cursor")
        award_financial_assistance_data = dictfetchall(db_cursor)

        legal_entity_location_field_map = {
            "address_line1": "legal_entity_address_line1",
            "address_line2": "legal_entity_address_line2",
            "address_line3": "legal_entity_address_line3",
            "city_name": "legal_entity_city_name",
            "congressional_code": "legal_entity_congressional",
            "county_code": "legal_entity_county_code",
            "county_name": "legal_entity_county_name",
            "foreign_city_name": "legal_entity_foreign_city",
            "foreign_postal_code": "legal_entity_foreign_posta",
            "foreign_province": "legal_entity_foreign_provi",
            "state_code": "legal_entity_state_code",
            "state_name": "legal_entity_state_name",
            "zip5": "legal_entity_zip5",
            "zip_last4": "legal_entity_zip_last4",
            "location_country_code": "legal_entity_country_code"
        }

        place_of_performance_field_map = {
            "city_name": "place_of_performance_city",
            "performance_code": "place_of_performance_code",
            "congressional_code": "place_of_performance_congr",
            "county_name": "place_of_perform_county_na",
            "foreign_location_description": "place_of_performance_forei",
            "state_name": "place_of_perform_state_nam",
            "zip4": "place_of_performance_zip4a",
            "location_country_code": "place_of_perform_country_c"

        }

        fad_field_map = {
            "type": "assistance_type",
            "description": "award_description",
        }

        logger.info("Getting total rows")
        # rows_loaded = len(current_ids)
        total_rows = len(award_financial_assistance_data)  # - rows_loaded

        logger.info("Processing " + str(total_rows) + " rows of assistance data")

        # skip_count = 0


# ROW ITERATION STARTS HERE

        lel_bulk = []
        pop_bulk = []
        legal_entity_bulk = []
        award_bulk = []

        transaction_assistance_bulk = []
        transaction_normalized_bulk = []

        logger.info('Getting legal entity location objects for {} rows...'.format(len(award_financial_assistance_data)))
        for index, row in enumerate(award_financial_assistance_data, 1):

            # Recipient flag is true for LeL
            legal_entity_location = get_or_create_location(
                legal_entity_location_field_map, row, {"recipient_flag": True}, save=False
            )

            lel_bulk.append(legal_entity_location)

        logger.info('Bulk creating {} legal entity location rows...'.format(len(lel_bulk)))
        try:
            Location.objects.bulk_create(lel_bulk)
        except IntegrityError:
            logger.info('!!! DUPLICATES FOUND. Continuing... ')

        logger.info('Getting place of performance objects for {} rows...'.format(len(award_financial_assistance_data)))
        for index, row in enumerate(award_financial_assistance_data, 1):

            # Place of Performance flag is true for PoP
            pop_location = get_or_create_location(
                place_of_performance_field_map, row, {"place_of_performance_flag": True}, save=False
            )

            pop_bulk.append(pop_location)

        logger.info('Bulk creating {} place of performance rows...'.format(len(pop_bulk)))
        try:
            Location.objects.bulk_create(pop_bulk)
        except IntegrityError:
            logger.info('!!! DUPLICATES FOUND. Continuing... ')

        logger.info('Getting legal entity objects for {} rows...'.format(len(award_financial_assistance_data)))
        for index, row in enumerate(award_financial_assistance_data, 1):

            recipient_name = row.get('awardee_or_recipient_legal', '')

            legal_entity = LegalEntity.objects.filter(recipient_unique_id=row['awardee_or_recipient_uniqu'],
                                                      recipient_name=recipient_name).first()

            if legal_entity is None:
                legal_entity = LegalEntity(recipient_unique_id=row['awardee_or_recipient_uniqu'],
                                           recipient_name=recipient_name)
                legal_entity_value_map = {
                    "location": lel_bulk[index - 1],
                }
                legal_entity = load_data_into_model(legal_entity, row, value_map=legal_entity_value_map, save=False)

            legal_entity_bulk.append(legal_entity)

        logger.info('Bulk creating {} legal entity rows...'.format(len(legal_entity_bulk)))
        try:
            LegalEntity.objects.bulk_create(legal_entity_bulk)
        except IntegrityError:
            logger.info('!!! DUPLICATES FOUND. Continuing... ')

        awarding_agency_list = []
        funding_agency_list = []

        logger.info('Getting award objects for {} rows...'.format(len(award_financial_assistance_data)))
        for index, row in enumerate(award_financial_assistance_data, 1):

                # If awarding toptier agency code (aka CGAC) is not supplied on the D2 record,
                # use the sub tier code to look it up. This code assumes that all incoming
                # records will supply an awarding subtier agency code
                if row['awarding_agency_code'] is None or len(row['awarding_agency_code'].strip()) < 1:
                    awarding_subtier_agency_id = subtier_agency_map[row["awarding_sub_tier_agency_c"]]
                    awarding_toptier_agency_id = subtier_to_agency_map[awarding_subtier_agency_id]['toptier_agency_id']
                    awarding_cgac_code = toptier_agency_map[awarding_toptier_agency_id]
                    row['awarding_agency_code'] = awarding_cgac_code

                # If funding toptier agency code (aka CGAC) is empty, try using the sub
                # tier funding code to look it up. Unlike the awarding agency, we can't
                # assume that the funding agency subtier code will always be present.
                if row['funding_agency_code'] is None or len(row['funding_agency_code'].strip()) < 1:
                    funding_subtier_agency_id = subtier_agency_map.get(row["funding_sub_tier_agency_co"])
                    if funding_subtier_agency_id is not None:
                        funding_toptier_agency_id = \
                            subtier_to_agency_map[funding_subtier_agency_id]['toptier_agency_id']
                        funding_cgac_code = toptier_agency_map[funding_toptier_agency_id]
                    else:
                        funding_cgac_code = None
                    row['funding_agency_code'] = funding_cgac_code

                # Find the award that this award transaction belongs to. If it doesn't exist, create it.
                awarding_agency = Agency.get_by_toptier_subtier(
                    row['awarding_agency_code'],
                    row["awarding_sub_tier_agency_c"]
                )
                funding_agency = Agency.get_by_toptier_subtier(
                    row['funding_agency_code'],
                    row["funding_sub_tier_agency_co"]
                )

                awarding_agency_list.append(awarding_agency)
                funding_agency_list.append(funding_agency)

                # award.save() is called in Award.get_or_create_summary_award by default
                created, award = Award.get_or_create_summary_award(
                    awarding_agency=awarding_agency,
                    fain=row.get('fain'),
                    uri=row.get('uri'),
                    save=False
                )

                award_bulk.append(award)
                award_update_id_list.append(award.id)

        logger.info('Bulk creating {} award rows...'.format(len(award_bulk)))
        try:
            Award.objects.bulk_create(award_bulk)
        except IntegrityError:
            logger.info('!!! DUPLICATES FOUND. Continuing... ')

        logger.info('Getting transaction_normalized for {} rows...'.format(len(award_financial_assistance_data)))
        for index, row in enumerate(award_financial_assistance_data, 1):

            parent_txn_value_map = {
                "award": award_bulk[index - 1],
                "awarding_agency": awarding_agency_list[index - 1],
                "funding_agency": funding_agency_list[index - 1],
                "recipient": legal_entity_bulk[index - 1],
                "place_of_performance": pop_bulk[index - 1],
                "period_of_performance_start_date": format_date(row['period_of_performance_star']),
                "period_of_performance_current_end_date": format_date(row['period_of_performance_curr']),
                "action_date": format_date(row['action_date']),
            }

            transaction_dict = load_data_into_model(
                TransactionNormalized(),  # thrown away
                row,
                field_map=fad_field_map,
                value_map=parent_txn_value_map,
                as_dict=True)

            transaction_normalized = TransactionNormalized.get_or_create_transaction(**transaction_dict)
            transaction_normalized.fiscal_year = fy(transaction_normalized.action_date)
            transaction_normalized_bulk.append(transaction_normalized)

        logger.info('Bulk creating {} TransactionNormalized rows...'.format(len(transaction_normalized_bulk)))
        try:
            TransactionNormalized.objects.bulk_create(transaction_normalized_bulk)
        except IntegrityError:
            logger.info('Tried and failed to insert duplicate transaction_normalized row. Continuing... ')

        for index, row in enumerate(award_financial_assistance_data, 1):
            financial_assistance_data = load_data_into_model(
                TransactionFABS(),  # thrown away
                row,
                as_dict=True)

            transaction_assistance = TransactionFABS(transaction=transaction_normalized_bulk[index - 1],
                                                     **financial_assistance_data)
            transaction_assistance_bulk.append(transaction_assistance)

        logger.info('Bulk creating TransactionFABS rows...')
        try:
            TransactionFABS.objects.bulk_create(transaction_assistance_bulk)
        except IntegrityError:
            logger.info('!!! DUPLICATES FOUND. Continuing... ')
Exemplo n.º 9
0
def test_fy_none():
    assert fy(None) is None
Exemplo n.º 10
0
def test_fy_type_exceptions(not_date):
    with pytest.raises(TypeError):
        fy(not_date)
Exemplo n.º 11
0
def test_fy_returns_correct(raw_date, expected_fy):
    assert fy(raw_date) == expected_fy
Exemplo n.º 12
0
def test_fy_returns_integer(raw_date, expected_fy):
    assert isinstance(fy(raw_date), int)
    def handle(self, *args, **options):

        h.clear_caches()

        csv_file = options['file'][0]
        self.logger.info("Starting load for file {}".format(csv_file))

        # Create the csv reader
        reader = CsvDataReader(csv_file)

        # Create a new submission attributes object for this timestamp
        subattr = SubmissionAttributes()
        subattr.usaspending_update = datetime.now()
        subattr.save()

        # Create lists to hold model instances for bulk insert
        txn_list = []
        txn_assistance_list = []

        for idx, row in enumerate(reader):
            if len(reader) % 1000 == 0:
                self.logger.info("Read row {}".format(len(reader)))
            row = h.cleanse_values(row)

            awarding_agency = self.get_awarding_agency(
                row)  # todo: use agency dict?

            # Create the transaction object for this row
            txn_dict = {
                "submission":
                subattr,
                "action_date":
                h.convert_date(row['obligation_action_date']),
                "action_type":
                h.up2colon(row['action_type']),
                "award":
                self.get_or_create_award(row, awarding_agency=awarding_agency),
                "awarding_agency":
                awarding_agency,
                "description":
                row["project_description"],  # ?? account_title is anther contender?
                "data_source":
                "USA",
                "federal_action_obligation":
                row["fed_funding_amount"],
                "last_modified_date":
                h.convert_date(row['last_modified_date']),
                "modification_number":
                row["federal_award_mod"],  # ??
                "period_of_performance_start_date":
                h.convert_date(row['starting_date']),
                "period_of_performance_current_end_date":
                h.convert_date(row['ending_date']),
                "place_of_performance":
                h.get_or_create_location(
                    row, location_mapper_fin_assistance_principal_place),
                "recipient":
                self.get_or_create_recipient(row),
                "type":
                h.up2colon(row['assistance_type']),
                "usaspending_unique_transaction_id":
                row["unique_transaction_id"],

                # ??"funding_agency_id":
                # ?? "certified date":
            }
            txn = Transaction(**txn_dict)
            txn.fiscal_year = fy(txn.action_date)
            txn_list.append(txn)

            # Create the transaction contract object for this row
            txn_assistance_dict = {
                "submission":
                subattr,
                "fain":
                row["federal_award_id"],
                "uri":
                row["uri"],
                "cfda":
                Cfda.objects.filter(
                    program_number=row["cfda_program_num"]).first(),
                "correction_late_delete_indicator":
                h.up2colon(row['correction_late_ind']),
                "face_value_loan_guarantee":
                row["face_loan_guran"],
                "fiscal_year_and_quarter_correction":
                row["fyq_correction"],
                "non_federal_funding_amount":
                row["non_fed_funding_amount"],
                "original_loan_subsidy_cost":
                row["orig_sub_guran"],  # ??
                "record_type":
                int(h.up2colon(row['record_type'])),
                "sai_number":
                row["sai_number"],
                "submitted_type":
                "C",  # ?? For CSV?
            }
            # ?? business_funds_indicator
            # ?? reporting period start/end??

            txn_assistance = TransactionAssistance(**txn_assistance_dict)
            txn_assistance_list.append(txn_assistance)

        # Bulk insert transaction rows
        self.logger.info(
            "Starting Transaction bulk insert ({} records)".format(
                len(txn_list)))
        Transaction.objects.bulk_create(txn_list)
        self.logger.info("Completed Transaction bulk insert")
        # Update txn assistance list with newly-inserted transactions
        award_id_list = []  # we'll need this when updating the awards later on
        for idx, t in enumerate(txn_assistance_list):
            t.transaction = txn_list[idx]
            award_id_list.append(txn_list[idx].award_id)
        # Bulk insert transaction assistance rows
        self.logger.info(
            "Starting TransactionAssistance bulk insert ({} records)".format(
                len(txn_assistance_list)))
        TransactionAssistance.objects.bulk_create(txn_assistance_list)
        self.logger.info("Completed TransactionAssistance bulk insert")

        # Update awards to reflect latest transaction information
        # (note that this can't be done via signals or a save()
        # override in the model itself, because those aren't
        # triggered by a bulk update
        self.logger.info("Starting Awards update")
        count = update_awards(tuple(award_id_list))
        update_contract_awards(tuple(award_id_list))
        update_model_description_fields()
        self.logger.info("Completed Awards update ({} records)".format(count))
Exemplo n.º 14
0
    def insert_new_fabs(self, to_insert, total_rows):
        logger.info('Starting insertion of new FABS data')

        place_of_performance_field_map = {
            "location_country_code": "place_of_perform_country_c",
            "country_name": "place_of_perform_country_n",
            "state_code": "place_of_perfor_state_code",
            "state_name": "place_of_perform_state_nam",
            "city_name": "place_of_performance_city",
            "county_name": "place_of_perform_county_na",
            "county_code": "place_of_perform_county_co",
            "foreign_location_description": "place_of_performance_forei",
            "zip_4a": "place_of_performance_zip4a",
            "congressional_code": "place_of_performance_congr",
            "performance_code": "place_of_performance_code",
            "zip_last4": "place_of_perform_zip_last4",
            "zip5": "place_of_performance_zip5"
        }

        legal_entity_location_field_map = {
            "location_country_code": "legal_entity_country_code",
            "country_name": "legal_entity_country_name",
            "state_code": "legal_entity_state_code",
            "state_name": "legal_entity_state_name",
            "city_name": "legal_entity_city_name",
            "city_code": "legal_entity_city_code",
            "county_name": "legal_entity_county_name",
            "county_code": "legal_entity_county_code",
            "address_line1": "legal_entity_address_line1",
            "address_line2": "legal_entity_address_line2",
            "address_line3": "legal_entity_address_line3",
            "foreign_location_description": "legal_entity_foreign_descr",
            "congressional_code": "legal_entity_congressional",
            "zip_last4": "legal_entity_zip_last4",
            "zip5": "legal_entity_zip5",
            "foreign_postal_code": "legal_entity_foreign_posta",
            "foreign_province": "legal_entity_foreign_provi",
            "foreign_city_name": "legal_entity_foreign_city"
        }

        start_time = datetime.now()

        for index, row in enumerate(to_insert, 1):
            if not (index % 1000):
                logger.info('Inserting Stale FABS: Inserting row {} of {} ({})'.format(str(index), str(total_rows),
                                                                                       datetime.now() - start_time))

            for key in row:
                if isinstance(row[key], str):
                    row[key] = row[key].upper()

            # Create new LegalEntityLocation and LegalEntity from the row data
            legal_entity_location = create_location(legal_entity_location_field_map, row, {"recipient_flag": True})
            recipient_name = row['awardee_or_recipient_legal']
            legal_entity = LegalEntity.objects.create(
                recipient_unique_id=row['awardee_or_recipient_uniqu'],
                recipient_name=recipient_name if recipient_name is not None else ""
            )
            legal_entity_value_map = {
                "location": legal_entity_location,
                "business_categories": get_business_categories(row=row, data_type='fabs'),
                "business_types_description": get_business_type_description(row['business_types'])
            }
            legal_entity = load_data_into_model(legal_entity, row, value_map=legal_entity_value_map, save=True)

            # Create the place of performance location
            pop_location = create_location(place_of_performance_field_map, row, {"place_of_performance_flag": True})

            # Find the toptier awards from the subtier awards
            awarding_agency = Agency.get_by_subtier_only(row["awarding_sub_tier_agency_c"])
            funding_agency = Agency.get_by_subtier_only(row["funding_sub_tier_agency_co"])

            # Generate the unique Award ID
            # "ASST_AW_" + awarding_sub_tier_agency_c + fain + uri

            # this will raise an exception if the cast to an int fails, that's ok since we don't want to process
            # non-numeric record type values
            record_type_int = int(row['record_type'])
            if record_type_int == 1:
                uri = row['uri'] if row['uri'] else '-NONE-'
                fain = '-NONE-'
            elif record_type_int == 2:
                uri = '-NONE-'
                fain = row['fain'] if row['fain'] else '-NONE-'
            else:
                raise Exception('Invalid record type encountered for the following afa_generated_unique record: %s' %
                                row['afa_generated_unique'])

            generated_unique_id = 'ASST_AW_' +\
                (row['awarding_sub_tier_agency_c'] if row['awarding_sub_tier_agency_c'] else '-NONE-') + '_' + \
                fain + '_' + uri

            # Create the summary Award
            (created, award) = Award.get_or_create_summary_award(generated_unique_award_id=generated_unique_id,
                                                                 fain=row['fain'],
                                                                 uri=row['uri'],
                                                                 record_type=row['record_type'])
            award.save()

            # Append row to list of Awards updated
            award_update_id_list.append(award.id)

            try:
                last_mod_date = datetime.strptime(str(row['modified_at']), "%Y-%m-%d %H:%M:%S.%f").date()
            except ValueError:
                last_mod_date = datetime.strptime(str(row['modified_at']), "%Y-%m-%d %H:%M:%S").date()
            parent_txn_value_map = {
                "award": award,
                "awarding_agency": awarding_agency,
                "funding_agency": funding_agency,
                "recipient": legal_entity,
                "place_of_performance": pop_location,
                "period_of_performance_start_date": format_date(row['period_of_performance_star']),
                "period_of_performance_current_end_date": format_date(row['period_of_performance_curr']),
                "action_date": format_date(row['action_date']),
                "last_modified_date": last_mod_date,
                "type_description": get_assistance_type_description(row['assistance_type']),
                "transaction_unique_id": row['afa_generated_unique'],
                "generated_unique_award_id": generated_unique_id
            }

            fad_field_map = {
                "type": "assistance_type",
                "description": "award_description",
            }

            transaction_normalized_dict = load_data_into_model(
                TransactionNormalized(),  # thrown away
                row,
                field_map=fad_field_map,
                value_map=parent_txn_value_map,
                as_dict=True)

            financial_assistance_data = load_data_into_model(
                TransactionFABS(),  # thrown away
                row,
                as_dict=True)

            afa_generated_unique = financial_assistance_data['afa_generated_unique']
            unique_fabs = TransactionFABS.objects.filter(afa_generated_unique=afa_generated_unique)

            if unique_fabs.first():
                transaction_normalized_dict["update_date"] = datetime.utcnow()
                transaction_normalized_dict["fiscal_year"] = fy(transaction_normalized_dict["action_date"])

                # Update TransactionNormalized
                TransactionNormalized.objects.filter(id=unique_fabs.first().transaction.id).\
                    update(**transaction_normalized_dict)

                # Update TransactionFABS
                unique_fabs.update(**financial_assistance_data)
            else:
                # Create TransactionNormalized
                transaction = TransactionNormalized(**transaction_normalized_dict)
                transaction.save()

                # Create TransactionFABS
                transaction_fabs = TransactionFABS(transaction=transaction, **financial_assistance_data)
                transaction_fabs.save()
Exemplo n.º 15
0
 def save(self, *args, **kwargs):
     self.fiscal_year = fy(self.action_date)
     super().save(*args, **kwargs)
Exemplo n.º 16
0
    def handle(self, *args, **options):

        h.clear_caches()

        csv_file = options['file'][0]
        self.logger.info("Starting load for file {}".format(csv_file))

        # Create the csv reader
        reader = CsvDataReader(csv_file)

        # Create a new submission attributes object for this timestamp
        subattr = SubmissionAttributes()
        subattr.usaspending_update = datetime.now()
        subattr.save()

        # Create lists to hold model instances for bulk insert
        txn_list = []
        txn_contract_list = []

        subtier_agency_dict = h.get_subtier_agency_dict()

        for idx, row in enumerate(reader):
            if len(reader) % 1000 == 0:
                self.logger.info("Read row {}".format(len(reader)))
            row = h.cleanse_values(row)

            awarding_agency_id = self.get_agency_id(row["contractingofficeagencyid"], subtier_agency_dict)

            # Create the transaction object for this row
            txn_dict = {
                "action_date": h.convert_date(row['signeddate']),
                "award": self.get_or_create_award(row, awarding_agency_id),
                "awarding_agency_id": awarding_agency_id,
                "data_source": "USA",
                "description": row["descriptionofcontractrequirement"],
                "federal_action_obligation": row["dollarsobligated"],
                "funding_agency_id": self.get_agency_id(row["fundingrequestingagencyid"], subtier_agency_dict),
                "last_modified_date": h.convert_date(row['last_modified_date']),
                "modification_number": row["modnumber"],
                "place_of_performance": h.get_or_create_location(
                    row, mapper=location_mapper_place_of_performance),
                "period_of_performance_current_end_date": h.convert_date(row['currentcompletiondate']),
                "period_of_performance_start_date": h.convert_date(row['effectivedate']),
                "recipient": self.get_or_create_recipient(row),
                "submission": subattr,
                "type": evaluate_contract_award_type(row),
                "action_type": h.up2colon(row['reasonformodification']),
                "usaspending_unique_transaction_id": row["unique_transaction_id"]
            }
            txn = Transaction(**txn_dict)
            txn.fiscal_year = fy(txn.action_date)
            txn_list.append(txn)

            # Create the transaction contract object for this row
            txn_contract_dict = {
                "submission": subattr,
                "piid": row['piid'],
                "parent_award_id": row['idvpiid'],
                "current_total_value_award": h.parse_numeric_value(row["baseandexercisedoptionsvalue"]),
                "period_of_performance_potential_end_date": h.convert_date(row['ultimatecompletiondate']),
                "potential_total_value_of_award": h.parse_numeric_value(row["baseandalloptionsvalue"]),
                "epa_designated_product": self.parse_first_character(row['useofepadesignatedproducts']),
                "gfe_gfp": h.up2colon(row['gfe_gfp']),
                "cost_or_pricing_data": h.up2colon(row['costorpricingdata']),
                "type_of_contract_pricing": h.up2colon(row['typeofcontractpricing']),
                "multiple_or_single_award_idv": h.up2colon(row['multipleorsingleawardidc']),
                "naics": h.up2colon(row['nationalinterestactioncode']),
                "dod_claimant_program_code": h.up2colon(row['claimantprogramcode']),
                "commercial_item_acquisition_procedures": h.up2colon(
                    row['commercialitemacquisitionprocedures']),
                "commercial_item_test_program": h.up2colon(row['commercialitemtestprogram']),
                "consolidated_contract": h.up2colon(row['consolidatedcontract']),
                "contingency_humanitarian_or_peacekeeping_operation": h.up2colon(
                    row['contingencyhumanitarianpeacekeepingoperation']),
                "contract_bundling": h.up2colon(row['contractbundling']),
                "contract_financing": h.up2colon(row['contractfinancing']),
                "contracting_officers_determination_of_business_size": h.up2colon(
                    row['contractingofficerbusinesssizedetermination']),
                "country_of_product_or_service_origin": h.up2colon(row['countryoforigin']),
                "davis_bacon_act": h.up2colon(row['davisbaconact']),
                "evaluated_preference": h.up2colon(row['evaluatedpreference']),
                "extent_competed": h.up2colon(row['extentcompeted']),
                "information_technology_commercial_item_category": h.up2colon(
                    row['informationtechnologycommercialitemcategory']),
                "interagency_contracting_authority": h.up2colon(row['interagencycontractingauthority']),
                "local_area_set_aside": h.up2colon(row['localareasetaside']),
                "purchase_card_as_payment_method": h.up2colon(row['purchasecardaspaymentmethod']),
                "multi_year_contract": h.up2colon(row['multiyearcontract']),
                "national_interest_action": h.up2colon(row['nationalinterestactioncode']),
                "number_of_actions": h.up2colon(row['numberofactions']),
                "number_of_offers_received": h.up2colon(row['numberofoffersreceived']),
                "performance_based_service_acquisition": h.up2colon(row['performancebasedservicecontract']),
                "place_of_manufacture": h.up2colon(row['placeofmanufacture']),
                "product_or_service_code": h.up2colon(row['productorservicecode']),
                "recovered_materials_sustainability": h.up2colon(row['recoveredmaterialclauses']),
                "research": h.up2colon(row['research']),
                "sea_transportation": h.up2colon(row['seatransportation']),
                "service_contract_act": h.up2colon(row['servicecontractact']),
                "small_business_competitiveness_demonstration_program": self.parse_first_character(
                    row['smallbusinesscompetitivenessdemonstrationprogram']),
                "solicitation_procedures": h.up2colon(row['solicitationprocedures']),
                "subcontracting_plan": h.up2colon(row['subcontractplan']),
                "type_set_aside": h.up2colon(row['typeofsetaside']),
                "walsh_healey_act": h.up2colon(row['walshhealyact']),
                "rec_flag": self.parse_first_character(h.up2colon(row['rec_flag'])),
                "type_of_idc": self.parse_first_character(row['typeofidc']),
                "a76_fair_act_action": self.parse_first_character(row['a76action']),
                "clinger_cohen_act_planning": self.parse_first_character(row['clingercohenact']),
                "cost_accounting_standards": self.parse_first_character(
                    row['costaccountingstandardsclause']),
                "fed_biz_opps": self.parse_first_character(row['fedbizopps']),
                "foreign_funding": self.parse_first_character(row['fundedbyforeignentity']),
                "major_program": self.parse_first_character(row['majorprogramcode']),
                "program_acronym": self.parse_first_character(row['programacronym']),
                "referenced_idv_modification_number": self.parse_first_character(
                    row['idvmodificationnumber']),
                "transaction_number": self.parse_first_character(row['transactionnumber']),
                "solicitation_identifier": self.parse_first_character(row['solicitationid'])
            }
            txn_contract = TransactionContract(**txn_contract_dict)
            txn_contract_list.append(txn_contract)

        # Bulk insert transaction rows
        self.logger.info("Starting Transaction bulk insert ({} records)".format(len(txn_list)))
        Transaction.objects.bulk_create(txn_list)
        self.logger.info("Completed Transaction bulk insert")
        # Update txn contract list with newly-inserted transactions
        award_id_list = []  # we'll need this when updating the awards later on
        for idx, t in enumerate(txn_contract_list):
            # add transaction info to this TransactionContract object
            t.transaction = txn_list[idx]
            # add the corresponding award id to a list we'll use when batch-updating award data
            award_id_list.append(txn_list[idx].award_id)
        # Bulk insert transaction contract rows
        self.logger.info("Starting TransactionContract bulk insert ({} records)".format(len(txn_contract_list)))
        TransactionContract.objects.bulk_create(txn_contract_list)
        self.logger.info("Completed TransactionContract bulk insert")

        # Update awards to reflect latest transaction information
        # (note that this can't be done via signals or a save()
        # override in the model itself, because those aren't
        # triggered by a bulk update
        self.logger.info("Starting Awards update")
        count = update_awards(tuple(award_id_list))
        update_contract_awards(tuple(award_id_list))
        update_model_description_fields()
        self.logger.info("Completed Awards update ({} records)".format(count))