Python up2colon示例，usaspending_api.etl.helpers.up2colon Python示例

示例#1

0

显示文件

def location_mapper_vendor(row):
    loc = {
        "city_name": row.get("city", ""),
        "congressional_code": row.get(
            "vendor_cd", "").zfill(2),  # Need to add leading zeroes here
        "location_country_code":
            row.get("vendorcountrycode",
                    ""),  # Never actually a country code, just the string name
        "location_zip": row.get("zipcode", "").replace("-", ""),
        "state_code": h.up2colon(row.get("vendor_state_code", "")),
        "address_line1": row.get("streetaddress", ""),
        "address_line2": row.get("streetaddress2", ""),
        "address_line3": row.get("streetaddress3", "")
    }
    return loc

示例#2

0

显示文件

文件： load_usaspending_contracts.py 项目： fedspendingtransparency/usaspending-api

def location_mapper_vendor(row):
    loc = {
        "city_name": row.get("city", ""),
        "congressional_code": row.get(
            "vendor_cd", "").zfill(2),  # Need to add leading zeroes here
        "location_country_code":
            row.get("vendorcountrycode",
                    ""),  # Never actually a country code, just the string name
        "location_zip": row.get("zipcode", "").replace("-", ""),
        "state_code": h.up2colon(row.get("vendor_state_code", "")),
        "address_line1": row.get("streetaddress", ""),
        "address_line2": row.get("streetaddress2", ""),
        "address_line3": row.get("streetaddress3", "")
    }
    return loc

示例#3

0

显示文件

def location_mapper_place_of_performance(row):

    loc = {
        "city_name": row.get("placeofperformancecity", ""),
        "congressional_code":
        row.get("placeofperformancecongressionaldistrict",
                "")[2:],  # Need to strip the state off the front
        "location_country_code": row.get("placeofperformancecountrycode", ""),
        "location_zip": row.get("placeofperformancezipcode", "").replace(
            "-", ""),  # Either ZIP5, or ZIP5+4, sometimes with hypens
        "state_code": h.up2colon(
            row.get("pop_state_code", "")
        )  # Format is VA: VIRGINIA, so we need to grab the first bit
    }
    return loc

示例#4

0

显示文件

文件： load_usaspending_contracts.py 项目： fedspendingtransparency/usaspending-api

def location_mapper_place_of_performance(row):

    loc = {
        "city_name": row.get("placeofperformancecity", ""),
        "congressional_code":
        row.get("placeofperformancecongressionaldistrict",
                "")[2:],  # Need to strip the state off the front
        "location_country_code": row.get("placeofperformancecountrycode", ""),
        "location_zip": row.get("placeofperformancezipcode", "").replace(
            "-", ""),  # Either ZIP5, or ZIP5+4, sometimes with hypens
        "state_code": h.up2colon(
            row.get("pop_state_code", "")
        )  # Format is VA: VIRGINIA, so we need to grab the first bit
    }
    return loc

示例#5

0

显示文件

def evaluate_contract_award_type(row):
    first_element = h.up2colon(row['contractactiontype'].split()[0])

    if len(first_element) == 1:
        return first_element
    else:
        cat = row['contractactiontype'].lower()
        # Not using DAIMS enumeration . . .
        if 'bpa' in cat:
            return 'A'
        elif 'purchase' in cat:
            return 'B'
        elif 'delivery' in cat:
            return 'C'
        elif 'definitive' in cat:
            return 'D'
        else:
            return None

示例#6

0

显示文件

文件： load_usaspending_contracts.py 项目： fedspendingtransparency/usaspending-api

def evaluate_contract_award_type(row):
    first_element = h.up2colon(row['contractactiontype'].split()[0])

    if len(first_element) == 1:
        return first_element
    else:
        cat = row['contractactiontype'].lower()
        # Not using DAIMS enumeration . . .
        if 'bpa' in cat:
            return 'A'
        elif 'purchase' in cat:
            return 'B'
        elif 'delivery' in cat:
            return 'C'
        elif 'definitive' in cat:
            return 'D'
        else:
            return None

示例#7

0

显示文件

文件： load_usaspending_assistance.py 项目： codestaruser/usaspending-api

    def handle(self, *args, **options):

        h.clear_caches()

        csv_file = options['file'][0]
        self.logger.info("Starting load for file {}".format(csv_file))

        # Create the csv reader
        reader = CsvDataReader(csv_file)

        # Create a new submission attributes object for this timestamp
        subattr = SubmissionAttributes()
        subattr.usaspending_update = datetime.now()
        subattr.save()

        # Create lists to hold model instances for bulk insert
        txn_list = []
        txn_assistance_list = []

        for idx, row in enumerate(reader):
            if len(reader) % 1000 == 0:
                self.logger.info("Read row {}".format(len(reader)))
            row = h.cleanse_values(row)

            awarding_agency = self.get_awarding_agency(
                row)  # todo: use agency dict?

            # Create the transaction object for this row
            txn_dict = {
                "submission":
                subattr,
                "action_date":
                h.convert_date(row['obligation_action_date']),
                "action_type":
                h.up2colon(row['action_type']),
                "award":
                self.get_or_create_award(row, awarding_agency=awarding_agency),
                "awarding_agency":
                awarding_agency,
                "description":
                row["project_description"],  # ?? account_title is anther contender?
                "data_source":
                "USA",
                "federal_action_obligation":
                row["fed_funding_amount"],
                "last_modified_date":
                h.convert_date(row['last_modified_date']),
                "modification_number":
                row["federal_award_mod"],  # ??
                "period_of_performance_start_date":
                h.convert_date(row['starting_date']),
                "period_of_performance_current_end_date":
                h.convert_date(row['ending_date']),
                "place_of_performance":
                h.get_or_create_location(
                    row, location_mapper_fin_assistance_principal_place),
                "recipient":
                self.get_or_create_recipient(row),
                "type":
                h.up2colon(row['assistance_type']),
                "usaspending_unique_transaction_id":
                row["unique_transaction_id"],

                # ??"funding_agency_id":
                # ?? "certified date":
            }
            txn = Transaction(**txn_dict)
            txn.fiscal_year = fy(txn.action_date)
            txn_list.append(txn)

            # Create the transaction contract object for this row
            txn_assistance_dict = {
                "submission":
                subattr,
                "fain":
                row["federal_award_id"],
                "uri":
                row["uri"],
                "cfda":
                Cfda.objects.filter(
                    program_number=row["cfda_program_num"]).first(),
                "correction_late_delete_indicator":
                h.up2colon(row['correction_late_ind']),
                "face_value_loan_guarantee":
                row["face_loan_guran"],
                "fiscal_year_and_quarter_correction":
                row["fyq_correction"],
                "non_federal_funding_amount":
                row["non_fed_funding_amount"],
                "original_loan_subsidy_cost":
                row["orig_sub_guran"],  # ??
                "record_type":
                int(h.up2colon(row['record_type'])),
                "sai_number":
                row["sai_number"],
                "submitted_type":
                "C",  # ?? For CSV?
            }
            # ?? business_funds_indicator
            # ?? reporting period start/end??

            txn_assistance = TransactionAssistance(**txn_assistance_dict)
            txn_assistance_list.append(txn_assistance)

        # Bulk insert transaction rows
        self.logger.info(
            "Starting Transaction bulk insert ({} records)".format(
                len(txn_list)))
        Transaction.objects.bulk_create(txn_list)
        self.logger.info("Completed Transaction bulk insert")
        # Update txn assistance list with newly-inserted transactions
        award_id_list = []  # we'll need this when updating the awards later on
        for idx, t in enumerate(txn_assistance_list):
            t.transaction = txn_list[idx]
            award_id_list.append(txn_list[idx].award_id)
        # Bulk insert transaction assistance rows
        self.logger.info(
            "Starting TransactionAssistance bulk insert ({} records)".format(
                len(txn_assistance_list)))
        TransactionAssistance.objects.bulk_create(txn_assistance_list)
        self.logger.info("Completed TransactionAssistance bulk insert")

        # Update awards to reflect latest transaction information
        # (note that this can't be done via signals or a save()
        # override in the model itself, because those aren't
        # triggered by a bulk update
        self.logger.info("Starting Awards update")
        count = update_awards(tuple(award_id_list))
        update_contract_awards(tuple(award_id_list))
        update_model_description_fields()
        self.logger.info("Completed Awards update ({} records)".format(count))

示例#8

0

显示文件

文件： load_usaspending_assistance.py 项目： codestaruser/usaspending-api

 def get_awarding_agency(self, row):
     toptier_code = h.up2colon(row['maj_agency_cat'])
     subtier_code = h.up2colon(row['agency_code'])
     return self.get_agency(toptier_code, subtier_code)

示例#9

0

显示文件

    def handle(self, *args, **options):

        csv_file = options['file'][0]
        self.logger.info("Starting load for file {}".format(csv_file))

        # Create the csv reader
        reader = CsvDataReader(csv_file)

        # Create a new submission attributes object for this timestamp
        subattr = SubmissionAttributes()
        subattr.usaspending_update = datetime.now()
        subattr.save()

        # Create lists to hold model instances for bulk insert
        txn_list = []
        txn_contract_list = []

        subtier_agency_dict = h.get_subtier_agency_dict()

        # Store some additional support data needed for the laod
        award_type_dict = {a[0]: a[1] for a in AWARD_TYPES}
        contract_pricing_dict = {c[0]: c[1] for c in CONTRACT_PRICING_TYPES}

        for idx, row in enumerate(reader):
            if len(reader) % 1000 == 0:
                self.logger.info("Read row {}".format(len(reader)))
            row = h.cleanse_values(row)

            awarding_agency_id = self.get_agency_id(row["contractingofficeagencyid"], subtier_agency_dict)

            # Create the transaction object for this row
            txn_dict = {
                "action_date": h.convert_date(row['signeddate']),
                "award": self.get_or_create_award(row, awarding_agency_id),
                "awarding_agency_id": awarding_agency_id,
                "data_source": "USA",
                "description": row["descriptionofcontractrequirement"],
                "federal_action_obligation": row["dollarsobligated"],
                "funding_agency_id": self.get_agency_id(row["fundingrequestingagencyid"], subtier_agency_dict),
                "last_modified_date": h.convert_date(row['last_modified_date']),
                "modification_number": row["modnumber"],
                "place_of_performance": h.get_or_create_location(
                    row, mapper=location_mapper_place_of_performance),
                "period_of_performance_current_end_date": h.convert_date(row['currentcompletiondate']),
                "period_of_performance_start_date": h.convert_date(row['effectivedate']),
                "recipient": self.get_or_create_recipient(row),
                "submission": subattr,
                "type": evaluate_contract_award_type(row),
                "type_description": award_type_dict.get(evaluate_contract_award_type(row)),
                "usaspending_unique_transaction_id": row["unique_transaction_id"]
            }
            txn = Transaction(**txn_dict)
            txn_list.append(txn)

            # Create the transaction contract object for this row
            txn_contract_dict = {
                "submission": subattr,
                "piid": row['piid'],
                "parent_award_id": row['idvpiid'],
                "current_total_value_award": h.parse_numeric_value(row["baseandexercisedoptionsvalue"]),
                "period_of_performance_potential_end_date": h.convert_date(row['ultimatecompletiondate']),
                "potential_total_value_of_award": h.parse_numeric_value(row["baseandalloptionsvalue"]),
                "epa_designated_product": self.parse_first_character(row['useofepadesignatedproducts']),
                "gfe_gfp": h.up2colon(row['gfe_gfp']),
                "cost_or_pricing_data": h.up2colon(row['costorpricingdata']),
                "type_of_contract_pricing": h.up2colon(row['typeofcontractpricing']),
                "type_of_contract_pricing_description": contract_pricing_dict.get(h.up2colon(row['typeofcontractpricing'])),
                "multiple_or_single_award_idv": h.up2colon(row['multipleorsingleawardidc']),
                "naics": h.up2colon(row['nationalinterestactioncode']),
                "dod_claimant_program_code": h.up2colon(row['claimantprogramcode']),
                "commercial_item_acquisition_procedures": h.up2colon(
                    row['commercialitemacquisitionprocedures']),
                "commercial_item_test_program": h.up2colon(row['commercialitemtestprogram']),
                "consolidated_contract": h.up2colon(row['consolidatedcontract']),
                "contingency_humanitarian_or_peacekeeping_operation": h.up2colon(
                    row['contingencyhumanitarianpeacekeepingoperation']),
                "contract_bundling": h.up2colon(row['contractbundling']),
                "contract_financing": h.up2colon(row['contractfinancing']),
                "contracting_officers_determination_of_business_size": h.up2colon(
                    row['contractingofficerbusinesssizedetermination']),
                "country_of_product_or_service_origin": h.up2colon(row['countryoforigin']),
                "davis_bacon_act": h.up2colon(row['davisbaconact']),
                "evaluated_preference": h.up2colon(row['evaluatedpreference']),
                "extent_competed": h.up2colon(row['extentcompeted']),
                "information_technology_commercial_item_category": h.up2colon(
                    row['informationtechnologycommercialitemcategory']),
                "interagency_contracting_authority": h.up2colon(row['interagencycontractingauthority']),
                "local_area_set_aside": h.up2colon(row['localareasetaside']),
                "purchase_card_as_payment_method": h.up2colon(row['purchasecardaspaymentmethod']),
                "multi_year_contract": h.up2colon(row['multiyearcontract']),
                "national_interest_action": h.up2colon(row['nationalinterestactioncode']),
                "number_of_actions": h.up2colon(row['numberofactions']),
                "number_of_offers_received": h.up2colon(row['numberofoffersreceived']),
                "performance_based_service_acquisition": h.up2colon(row['performancebasedservicecontract']),
                "place_of_manufacture": h.up2colon(row['placeofmanufacture']),
                "product_or_service_code": h.up2colon(row['productorservicecode']),
                "recovered_materials_sustainability": h.up2colon(row['recoveredmaterialclauses']),
                "research": h.up2colon(row['research']),
                "sea_transportation": h.up2colon(row['seatransportation']),
                "service_contract_act": h.up2colon(row['servicecontractact']),
                "small_business_competitiveness_demonstration_program": self.parse_first_character(
                    row['smallbusinesscompetitivenessdemonstrationprogram']),
                "solicitation_procedures": h.up2colon(row['solicitationprocedures']),
                "subcontracting_plan": h.up2colon(row['subcontractplan']),
                "type_set_aside": h.up2colon(row['typeofsetaside']),
                "walsh_healey_act": h.up2colon(row['walshhealyact']),
                "rec_flag": self.parse_first_character(h.up2colon(row['rec_flag'])),
                "type_of_idc": self.parse_first_character(row['typeofidc']),
                "a76_fair_act_action": self.parse_first_character(row['a76action']),
                "clinger_cohen_act_planning": self.parse_first_character(row['clingercohenact']),
                "cost_accounting_standards": self.parse_first_character(
                    row['costaccountingstandardsclause']),
                "fed_biz_opps": self.parse_first_character(row['fedbizopps']),
                "foreign_funding": self.parse_first_character(row['fundedbyforeignentity']),
                "major_program": self.parse_first_character(row['majorprogramcode']),
                "program_acronym": self.parse_first_character(row['programacronym']),
                "referenced_idv_modification_number": self.parse_first_character(
                    row['idvmodificationnumber']),
                "transaction_number": self.parse_first_character(row['transactionnumber']),
                "solicitation_identifier": self.parse_first_character(row['solicitationid'])
            }
            txn_contract = TransactionContract(**txn_contract_dict)
            txn_contract_list.append(txn_contract)

        # Bulk insert transaction rows
        self.logger.info("Starting Transaction bulk insert ({} records)".format(len(txn_list)))
        Transaction.objects.bulk_create(txn_list)
        self.logger.info("Completed Transaction bulk insert")
        # Update txn contract list with newly-inserted transactions
        award_id_list = []  # we'll need this when updating the awards later on
        for idx, t in enumerate(txn_contract_list):
            # add transaction info to this TransactionContract object
            t.transaction = txn_list[idx]
            # add the corresponding award id to a list we'll use when batch-updating award data
            award_id_list.append(txn_list[idx].award_id)
        # Bulk insert transaction contract rows
        self.logger.info("Starting TransactionContract bulk insert ({} records)".format(len(txn_contract_list)))
        TransactionContract.objects.bulk_create(txn_contract_list)
        self.logger.info("Completed TransactionContract bulk insert")

        # Update awards to reflect latest transaction information
        # (note that this can't be done via signals or a save()
        # override in the model itself, because those aren't
        # triggered by a bulk update
        self.logger.info("Starting Awards update")
        count = update_awards(tuple(award_id_list))
        update_contract_awards(tuple(award_id_list))
        self.logger.info("Completed Awards update ({} records)".format(count))

示例#10

0

显示文件

 def get_agency_id(self, agency_string, subtier_agency_dict):
     agency_code = h.up2colon(agency_string)
     agency_id = subtier_agency_dict.get(agency_code)
     if not agency_id:
         self.logger.error("Missing agency: " + agency_string)
     return agency_id