Пример #1
0
    def test_auto_fields(self):
        record = HMDARecord(
            as_of_year=2015, respondent_id='22-333', agency_code='9',
            loan_type=1, property_type=1, loan_purpose=1, owner_occupancy=1,
            loan_amount_000s=55, preapproval='1', action_taken=1,
            msamd='01234', statefp='11', countyfp='222',
            census_tract_number ='01234', applicant_ethnicity='1',
            co_applicant_ethnicity='1', applicant_race_1='1', co_applicant_race_1='1',
            applicant_sex='1', co_applicant_sex='1', applicant_income_000s='1000',
            purchaser_type='1', rate_spread='0123', hoepa_status='1', lien_status='1',
            sequence_number='1', population='1', minority_population='1',
            ffieic_median_family_income='1000', tract_to_msamd_income='1000',
            number_of_owner_occupied_units='1', number_of_1_to_4_family_units='1',
            application_date_indicator=1)
        record.geo_id = '11222333000'
        record.institution_id='922-333'
        record.save()
        self.assertEqual(record.institution_id, '922-333')
        record.delete()

        record = HMDARecord(
            as_of_year=2015, respondent_id='22-333', agency_code='9',
            loan_type=1, property_type=1, loan_purpose=1, owner_occupancy=1,
            loan_amount_000s=55, preapproval='1', action_taken=1,
            msamd='01234', statefp='11', countyfp='222',
            census_tract_number ='01234', applicant_ethnicity='1',
            co_applicant_ethnicity='1', applicant_race_1='1', co_applicant_race_1='1',
            applicant_sex='1', co_applicant_sex='1', applicant_income_000s='1000',
            purchaser_type='1', rate_spread='0123', hoepa_status='1', lien_status='1',
            sequence_number='1', population='1', minority_population='1',
            ffieic_median_family_income='1000', tract_to_msamd_income='1000',
            number_of_owner_occupied_units='1', number_of_1_to_4_family_units='1',
            application_date_indicator=1)
        record.geo_id='11222333000'
        record.institution_id='922-333'
        self.assertEqual(record.institution_id, '922-333')
Пример #2
0
        def records(self, csv_file):
            """A generator returning a new Record with each call. Required as
            there are too many to instantiate in memory at once"""
            prevent_delete = False
            datafile = open(csv_file, 'r')
            i = 0
            inserted_counter = 0
            skipped_counter = 0
            log_info("Processing " + csv_file)
            for row in reader(datafile):
                i += 1
                if i % 25000 == 0:
                    log_info("Records Processed For File " + str(i))

                try:

                    record = HMDARecord(as_of_year=int(row[0]),
                                        respondent_id=row[1],
                                        agency_code=row[2],
                                        loan_type=int(row[3]),
                                        property_type=row[4],
                                        loan_purpose=int(row[5]),
                                        owner_occupancy=int(row[6]),
                                        loan_amount_000s=int(row[7]),
                                        preapproval=row[8],
                                        action_taken=int(row[9]),
                                        msamd=row[10],
                                        statefp=row[11],
                                        countyfp=row[12],
                                        census_tract_number=row[13],
                                        applicant_ethnicity=row[14],
                                        co_applicant_ethnicity=row[15],
                                        applicant_race_1=row[16],
                                        applicant_race_2=row[17],
                                        applicant_race_3=row[18],
                                        applicant_race_4=row[19],
                                        applicant_race_5=row[20],
                                        co_applicant_race_1=row[21],
                                        co_applicant_race_2=row[22],
                                        co_applicant_race_3=row[23],
                                        co_applicant_race_4=row[24],
                                        co_applicant_race_5=row[25],
                                        applicant_sex=int(row[26]),
                                        co_applicant_sex=int(row[27]),
                                        applicant_income_000s=row[28],
                                        purchaser_type=row[29],
                                        denial_reason_1=row[30],
                                        denial_reason_2=row[31],
                                        denial_reason_3=row[32],
                                        rate_spread=row[33],
                                        hoepa_status=row[34],
                                        lien_status=row[35],
                                        edit_status=row[36],
                                        sequence_number=row[37],
                                        population=row[38],
                                        minority_population=row[39],
                                        ffieic_median_family_income=row[40],
                                        tract_to_msamd_income=row[41],
                                        number_of_owner_occupied_units=row[42],
                                        number_of_1_to_4_family_units=row[43],
                                        application_date_indicator=row[44])

                    censustract = row[11] + row[12] + row[13].replace('.', '')

                    record.geo_id = errors.in_2010.get(censustract,
                                                       censustract)

                    record.institution_id = row[2] + row[1]

                    self.total_lines_read = self.total_lines_read + 1

                    if filter_hmda:
                        if (row[11] not in known_hmda and row[11] in geo_states
                                and 'NA' not in record.geo_id):
                            inserted_counter += 1
                            yield record
                        else:
                            skipped_counter += 1
                    else:
                        if row[11] in geo_states and 'NA' not in record.geo_id:
                            inserted_counter = inserted_counter + 1
                            yield record
                        else:
                            if 'NA' in record.geo_id:
                                self.na_skipped = self.na_skipped + 1
                            else:
                                self.other_skipped = self.other_skipped + 1

                            self.total_skipped = self.total_skipped + 1

                        skipped_counter += 1

                except:
                    prevent_delete = True
                    log_info('*****************************')
                    log_info("Error processing csv_file")
                    log_info("Record Line Number " + str(i))
                    log_info("Row: " + str(row))
                    log_info("Unexpected error:", sys.exc_info()[0])
                    log_info(traceback.print_exc())
                    log_info('*****************************')

            datafile.close()

            log_info("Finished Processing File: " + str(i))
            log_info("Records That have been yield/Inserted For File: " +
                     str(inserted_counter))
            log_info("Records Skipped For File: " + str(skipped_counter))

            if delete_file:
                if not prevent_delete:
                    os.remove(csv_file)
Пример #3
0
        def records(self, csv_file):
            """A generator returning a new Record with each call. Required as
            there are too many to instantiate in memory at once"""
            prevent_delete = False
            datafile = open(csv_file, "r")
            i = 0
            inserted_counter = 0
            skipped_counter = 0
            log_info("Processing " + csv_file)
            for row in reader(datafile):
                i += 1
                if i % 25000 == 0:
                    log_info("Records Processed For File " + str(i))

                try:

                    record = HMDARecord(
                        as_of_year=int(row[0]),
                        respondent_id=row[1],
                        agency_code=row[2],
                        loan_type=int(row[3]),
                        property_type=row[4],
                        loan_purpose=int(row[5]),
                        owner_occupancy=int(row[6]),
                        loan_amount_000s=int(row[7]),
                        preapproval=row[8],
                        action_taken=int(row[9]),
                        msamd=row[10],
                        statefp=row[11],
                        countyfp=row[12],
                        census_tract_number=row[13],
                        applicant_ethnicity=row[14],
                        co_applicant_ethnicity=row[15],
                        applicant_race_1=row[16],
                        applicant_race_2=row[17],
                        applicant_race_3=row[18],
                        applicant_race_4=row[19],
                        applicant_race_5=row[20],
                        co_applicant_race_1=row[21],
                        co_applicant_race_2=row[22],
                        co_applicant_race_3=row[23],
                        co_applicant_race_4=row[24],
                        co_applicant_race_5=row[25],
                        applicant_sex=int(row[26]),
                        co_applicant_sex=int(row[27]),
                        applicant_income_000s=row[28],
                        purchaser_type=row[29],
                        denial_reason_1=row[30],
                        denial_reason_2=row[31],
                        denial_reason_3=row[32],
                        rate_spread=row[33],
                        hoepa_status=row[34],
                        lien_status=row[35],
                        edit_status=row[36],
                        sequence_number=row[37],
                        population=row[38],
                        minority_population=row[39],
                        ffieic_median_family_income=row[40],
                        tract_to_msamd_income=row[41],
                        number_of_owner_occupied_units=row[42],
                        number_of_1_to_4_family_units=row[43],
                        application_date_indicator=row[44],
                    )

                    censustract = row[11] + row[12] + row[13].replace(".", "")
                    censustract = errors.in_2010.get(censustract, censustract)
                    record.geo_id = str(record.as_of_year) + censustract

                    record.institution_id = str(record.as_of_year) + record.agency_code + record.respondent_id

                    self.total_lines_read = self.total_lines_read + 1

                    if filter_hmda:
                        if row[11] not in known_hmda and row[11] in geo_states and "NA" not in record.geo_id:
                            inserted_counter += 1
                            yield record
                        else:
                            skipped_counter += 1
                    else:
                        if row[11] in geo_states and "NA" not in record.geo_id:
                            inserted_counter = inserted_counter + 1
                            yield record
                        else:
                            if "NA" in record.geo_id:
                                self.na_skipped = self.na_skipped + 1
                            else:
                                self.other_skipped = self.other_skipped + 1

                            self.total_skipped = self.total_skipped + 1

                        skipped_counter += 1

                except:
                    prevent_delete = True
                    log_info("*****************************")
                    log_info("Error processing csv_file")
                    log_info("Record Line Number " + str(i))
                    log_info("Row: " + str(row))
                    log_info("Unexpected error:", sys.exc_info()[0])
                    log_info(traceback.print_exc())
                    log_info("*****************************")

            datafile.close()

            log_info("Finished Processing File: " + str(i))
            log_info("Records That have been yield/Inserted For File: " + str(inserted_counter))
            log_info("Records Skipped For File: " + str(skipped_counter))

            if delete_file:
                if not prevent_delete:
                    os.remove(csv_file)
Пример #4
0
    def test_auto_fields(self):
        record = HMDARecord(as_of_year=2015,
                            respondent_id='22-333',
                            agency_code='9',
                            loan_type=1,
                            property_type=1,
                            loan_purpose=1,
                            owner_occupancy=1,
                            loan_amount_000s=55,
                            preapproval='1',
                            action_taken=1,
                            msamd='01234',
                            statefp='11',
                            countyfp='222',
                            census_tract_number='01234',
                            applicant_ethnicity='1',
                            co_applicant_ethnicity='1',
                            applicant_race_1='1',
                            co_applicant_race_1='1',
                            applicant_sex='1',
                            co_applicant_sex='1',
                            applicant_income_000s='1000',
                            purchaser_type='1',
                            rate_spread='0123',
                            hoepa_status='1',
                            lien_status='1',
                            sequence_number='1',
                            population='1',
                            minority_population='1',
                            ffieic_median_family_income='1000',
                            tract_to_msamd_income='1000',
                            number_of_owner_occupied_units='1',
                            number_of_1_to_4_family_units='1',
                            application_date_indicator=1)
        record.geo_id = '11222333000'
        record.institution_id = '922-333'
        record.save()
        self.assertEqual(record.institution_id, '922-333')
        record.delete()

        record = HMDARecord(as_of_year=2015,
                            respondent_id='22-333',
                            agency_code='9',
                            loan_type=1,
                            property_type=1,
                            loan_purpose=1,
                            owner_occupancy=1,
                            loan_amount_000s=55,
                            preapproval='1',
                            action_taken=1,
                            msamd='01234',
                            statefp='11',
                            countyfp='222',
                            census_tract_number='01234',
                            applicant_ethnicity='1',
                            co_applicant_ethnicity='1',
                            applicant_race_1='1',
                            co_applicant_race_1='1',
                            applicant_sex='1',
                            co_applicant_sex='1',
                            applicant_income_000s='1000',
                            purchaser_type='1',
                            rate_spread='0123',
                            hoepa_status='1',
                            lien_status='1',
                            sequence_number='1',
                            population='1',
                            minority_population='1',
                            ffieic_median_family_income='1000',
                            tract_to_msamd_income='1000',
                            number_of_owner_occupied_units='1',
                            number_of_1_to_4_family_units='1',
                            application_date_indicator=1)
        record.geo_id = '11222333000'
        record.institution_id = '922-333'
        self.assertEqual(record.institution_id, '922-333')