Python validate_fips примеры, data_research.mortgage_utilities.fips_meta.validate_fips Python примеры использования

Пример #1

0

Показать файл

Файл: load_mortgage_performance_csv.py Проект: isabella232/consumerfinance.gov

def load_values(return_fips=False):
    """
    Drop and reload the CountyMortgageData table, or just return a FIPS list.

    This is not used in the data pipeline and is mainly for local testing.
    Passing `return_fips=True` will return a sorted list of source FIPS values.
    The script assumes that `starting_date` and `through_date`
    have been set in constants.
    """

    counter = 0
    source_url = "{}/{}".format(S3_SOURCE_BUCKET, S3_SOURCE_FILE)
    starting_date = MortgageDataConstant.objects.get(
        name='starting_date').date_value
    through_date = MortgageDataConstant.objects.get(
        name='through_date').date_value
    raw_data = read_in_s3_csv(source_url)
    # raw_data is a generator delivering data dicts, each representing a row
    if return_fips is True:
        fips_list = [validate_fips(row.get('fips')) for row in raw_data]
        return sorted(set(fips_list))
    logger.info("Deleting CountyMortgageData objects.")
    CountyMortgageData.objects.all().delete()
    logger.info("CountyMorgtgageData count is now {}".format(
        CountyMortgageData.objects.count()))
    for row in raw_data:
        sampling_date = parser.parse(row.get('date')).date()
        if sampling_date >= starting_date and sampling_date <= through_date:
            valid_fips = validate_fips(row.get('fips'))
            if valid_fips:
                county = County.objects.get(fips=valid_fips)
                obj = CountyMortgageData(
                    fips=valid_fips,
                    county=county,
                    date=sampling_date,
                    total=int(row.get('open')),
                    current=int(row.get('current')),
                    thirty=int(row.get('thirty')),
                    sixty=int(row.get('sixty')),
                    ninety=int(row.get('ninety')),
                    other=int(row.get('other')))
                obj.save()
                counter += 1
                if counter % 10000 == 0:  # pragma: no cover
                    sys.stdout.write('.')
                    sys.stdout.flush()
                if counter % 100000 == 0:  # pragma: no cover
                    logger.info("\n{}".format(counter))
    logger.info("\nCreated {} CountyMortgageData objects".format(
        CountyMortgageData.objects.count()))

Пример #2

0

Показать файл

Файл: load_mortgage_performance_csv.py Проект: OrlandoSoto/cfgov-refresh

def load_values(return_fips=False):
    """
    Drop and reload the CountyMortgageData table, or just return a FIPS list.

    This is not used in the data pipeline and is mainly for local testing.
    Passing `return_fips=True` will return a sorted list of source FIPS values.
    The script assumes that `starting_date` and `through_date`
    have been set in constants.
    """

    counter = 0
    source_url = "{}/{}".format(S3_SOURCE_BUCKET, S3_SOURCE_FILE)
    starting_date = MortgageDataConstant.objects.get(
        name='starting_date').date_value
    through_date = MortgageDataConstant.objects.get(
        name='through_date').date_value
    raw_data = read_in_s3_csv(source_url)
    # raw_data is a generator delivering data dicts, each representing a row
    if return_fips is True:
        fips_list = [validate_fips(row.get('fips')) for row in raw_data]
        return sorted(set(fips_list))
    logger.info("Deleting CountyMortgageData objects.")
    CountyMortgageData.objects.all().delete()
    logger.info("CountyMorgtgageData count is now {}".format(
        CountyMortgageData.objects.count()))
    for row in raw_data:
        sampling_date = parser.parse(row.get('date')).date()
        if sampling_date >= starting_date and sampling_date <= through_date:
            valid_fips = validate_fips(row.get('fips'))
            if valid_fips:
                county = County.objects.get(fips=valid_fips)
                obj = CountyMortgageData(
                    fips=valid_fips,
                    county=county,
                    date=sampling_date,
                    total=int(row.get('open')),
                    current=int(row.get('current')),
                    thirty=int(row.get('thirty')),
                    sixty=int(row.get('sixty')),
                    ninety=int(row.get('ninety')),
                    other=int(row.get('other')))
                obj.save()
                counter += 1
                if counter % 10000 == 0:  # pragma: no cover
                    sys.stdout.write('.')
                    sys.stdout.flush()
                if counter % 100000 == 0:  # pragma: no cover
                    logger.info("\n{}".format(counter))
    logger.info("\nCreated {} CountyMortgageData objects".format(
        CountyMortgageData.objects.count()))

Пример #3

0

Показать файл

Файл: source_to_dump.py Проект: yunghuffy/cfgov-refresh

def create_dump(
        starting_date, through_date, dump_slug, sql=True):
    """
    Sample input CSV field_names and row:
    date,fips,open,current,thirty,sixty,ninety,other
    01/01/08,1001,268,260,4,1,0,3

    Default is to dump SQL for mysql loading. Alternative is to dump CSV.
    CSV is portable and less brittle, but our mysql setup doesn't allow it.
    If we switch to Postgres, we can make CSV the default.
    """

    starter = datetime.datetime.now()
    counter = 0
    pk = 1
    rows_out = []
    source_url = "{}/{}".format(S3_SOURCE_BUCKET, S3_SOURCE_FILE)
    raw_data = read_in_s3_csv(source_url)
    for row in raw_data:
        sampling_date = parser.parse(row.get('date')).date()
        if sampling_date >= starting_date and sampling_date <= through_date:
            valid_fips = validate_fips(row.get('fips'))
            if valid_fips:
                county_pk = County.objects.get(fips=valid_fips).pk
                rows_out.append([
                    pk,
                    valid_fips,
                    "{}".format(sampling_date),
                    row.get('open'),
                    row.get('current'),
                    row.get('thirty'),
                    row.get('sixty'),
                    row.get('ninety'),
                    row.get('other'),
                    county_pk])
                pk += 1
                counter += 1
                if counter % 10000 == 0:  # pragma: no cover
                    sys.stdout.write('.')
                    sys.stdout.flush()
                if counter % 100000 == 0:  # pragma: no cover
                    logger.info("\n{}".format(counter))
    if sql is True:
        dump_as_sql(rows_out, dump_slug)
    else:
        dump_as_csv(rows_out, dump_slug)
    logger.info('\nceate_dump took {} to create a file with {} rows'.format(
        (datetime.datetime.now() - starter), len(rows_out)))

Пример #4

0

Показать файл

 def test_validate_fips_too_long(self):
     fips_input = '123456'
     self.assertEqual(validate_fips(fips_input), None)

Пример #5

0

Показать файл

 def test_validate_fips_too_short(self):
     fips_input = '12'
     self.assertEqual(validate_fips(fips_input), None)

Пример #6

0

Показать файл

Файл: test_scripts.py Проект: chosak/cfgov-refresh

 def test_validate_fips_keep_outdated(self):
     fips_input = '02201'  # a normally excluded outdated FIPS code
     self.assertEqual(validate_fips(
         fips_input, keep_outdated=True), '02201')

Пример #7

0

Показать файл

Файл: test_scripts.py Проект: chosak/cfgov-refresh

 def test_validate_fips_outdated_fips(self):
     fips_input = '02201'  # a normally excluded outdated FIPS code
     self.assertIs(validate_fips(fips_input), None)

Пример #8

0

Показать файл

 def test_validate_fips_outdated_fips(self):
     fips_input = '02201'  # a normally excluded outdated FIPS code
     self.assertIs(validate_fips(fips_input), None)

Пример #9

0

Показать файл

 def test_validate_fips_invalid_5_digit(self):
     fips_input = '02201'
     self.assertEqual(validate_fips(fips_input), None)

Пример #10

0

Показать файл

Файл: test_scripts.py Проект: chosak/cfgov-refresh

 def test_validate_fips_edge_case(self):
     fips_input = '46113'
     self.assertEqual(validate_fips(fips_input), '46102')

Пример #11

0

Показать файл

Файл: test_scripts.py Проект: chosak/cfgov-refresh

 def test_validate_fips_too_long(self):
     fips_input = '123456'
     self.assertEqual(validate_fips(fips_input), None)

Пример #12

0

Показать файл

Файл: test_scripts.py Проект: chosak/cfgov-refresh

 def test_validate_fips_too_short(self):
     fips_input = '12'
     self.assertEqual(validate_fips(fips_input), None)

Пример #13

0

Показать файл

Файл: process_mortgage_data.py Проект: chosak/cfgov-refresh

def process_source(
        starting_date, through_date, dump_slug=None):
    """
    Re-generate aggregated data from the latest source CSV posted to S3.

    This operation has three steps
    - Wipe and regenerate the base county_mortgage_data table.
    - Regenerate aggregated data for MSAs, non-MSAs, states and national.
    - Update metadata values and files.
    - Export new downloadable public CSV files.

    If dump_slug is provided, a CSV the base county tables will be dumped.

    The input CSV has the following field_names and row form:
    date,fips,open,current,thirty,sixty,ninety,other
    01/01/08,1001,268,260,4,1,0,3

    """
    starter = datetime.datetime.now()
    counter = 0
    pk = 1
    new_objects = []
    # truncate table
    CountyMortgageData.objects.all().delete()
    source_url = "{}/{}".format(S3_SOURCE_BUCKET, S3_SOURCE_FILE)
    raw_data = read_in_s3_csv(source_url)
    for row in raw_data:
        sampling_date = parser.parse(row.get('date')).date()
        if sampling_date >= starting_date and sampling_date <= through_date:
            valid_fips = validate_fips(row.get('fips'))
            if valid_fips:
                county = County.objects.get(fips=valid_fips)
                new_objects.append(
                    CountyMortgageData(
                        pk=pk,
                        fips=valid_fips,
                        date=sampling_date,
                        total=row.get('open'),
                        current=row.get('current'),
                        thirty=row.get('thirty'),
                        sixty=row.get('sixty'),
                        ninety=row.get('ninety'),
                        other=row.get('other'),
                        county=county
                    ))
                pk += 1
                counter += 1
                if counter % 10000 == 0:  # pragma: no cover
                    sys.stdout.write('.')
                    sys.stdout.flush()
                if counter % 100000 == 0:  # pragma: no cover
                    logger.info("\n{}".format(counter))
    CountyMortgageData.objects.bulk_create(new_objects)
    logger.info('\n{} took {} '
                'to create {} countymortgage records'.format(
                    SCRIPT_NAME,
                    (datetime.datetime.now() - starter),
                    len(new_objects)))
    if dump_slug:
        rows = []
        for obj in new_objects:
            rows.append([
                obj.pk,
                obj.fips,
                "{}".format(obj.date),
                obj.total,
                obj.current,
                obj.thirty,
                obj.sixty,
                obj.ninety,
                obj.other,
                county.pk
            ])
        dump_as_csv(rows, dump_slug)

Пример #14

0

Показать файл

def process_source(starting_date, through_date, dump_slug=None):
    """
    Re-generate aggregated data from the latest source CSV posted to S3.

    This operation has three steps
    - Wipe and regenerate the base county_mortgage_data table.
    - Regenerate aggregated data for MSAs, non-MSAs, states and national.
    - Update metadata values and files.
    - Export new downloadable public CSV files.

    If dump_slug is provided, a CSV the base county tables will be dumped.

    The input CSV has the following field_names and row form:
    date,fips,open,current,thirty,sixty,ninety,other
    01/01/08,1001,268,260,4,1,0,3

    """
    starter = datetime.datetime.now()
    counter = 0
    pk = 1
    new_objects = []
    # truncate table
    CountyMortgageData.objects.all().delete()
    source_url = "{}/{}".format(S3_SOURCE_BUCKET, S3_SOURCE_FILE)
    raw_data = read_in_s3_csv(source_url)
    for row in raw_data:
        sampling_date = parser.parse(row.get('date')).date()
        if sampling_date >= starting_date and sampling_date <= through_date:
            valid_fips = validate_fips(row.get('fips'))
            if valid_fips:
                county = County.objects.get(fips=valid_fips)
                new_objects.append(
                    CountyMortgageData(pk=pk,
                                       fips=valid_fips,
                                       date=sampling_date,
                                       total=row.get('open'),
                                       current=row.get('current'),
                                       thirty=row.get('thirty'),
                                       sixty=row.get('sixty'),
                                       ninety=row.get('ninety'),
                                       other=row.get('other'),
                                       county=county))
                pk += 1
                counter += 1
                if counter % 10000 == 0:  # pragma: no cover
                    sys.stdout.write('.')
                    sys.stdout.flush()
                if counter % 100000 == 0:  # pragma: no cover
                    logger.info("\n{}".format(counter))
    CountyMortgageData.objects.bulk_create(new_objects)
    logger.info('\n{} took {} '
                'to create {} countymortgage records'.format(
                    SCRIPT_NAME, (datetime.datetime.now() - starter),
                    len(new_objects)))
    if dump_slug:
        dump_as_csv(((
            obj.pk,
            obj.fips,
            "{}".format(obj.date),
            obj.total,
            obj.current,
            obj.thirty,
            obj.sixty,
            obj.ninety,
            obj.other,
            obj.county.pk,
        ) for obj in new_objects), dump_slug)

Пример #15

0

Показать файл

 def test_validate_fips_edge_case(self):
     fips_input = '46113'
     self.assertEqual(validate_fips(fips_input), '46102')

Пример #16

0

Показать файл

 def test_validate_fips_4_digit(self):
     fips_input = '1015'
     self.assertEqual(validate_fips(fips_input), '01015')

Пример #17

0

Показать файл

Файл: test_scripts.py Проект: chosak/cfgov-refresh

 def test_validate_fips_4_digit(self):
     fips_input = '1015'
     self.assertEqual(validate_fips(fips_input), '01015')

Пример #18

0

Показать файл

 def test_validate_fips_valid_5_digit(self):
     fips_input = '34041'
     self.assertEqual(validate_fips(fips_input), '34041')

Пример #19

0

Показать файл

Файл: test_scripts.py Проект: chosak/cfgov-refresh

 def test_validate_fips_invalid_5_digit(self):
     fips_input = '02201'
     self.assertEqual(validate_fips(fips_input), None)

Пример #20

0

Показать файл

 def test_validate_fips_keep_outdated(self):
     fips_input = '02201'  # a normally excluded outdated FIPS code
     self.assertEqual(validate_fips(
         fips_input, keep_outdated=True), '02201')

Пример #21

0

Показать файл

Файл: test_scripts.py Проект: chosak/cfgov-refresh

 def test_validate_fips_valid_5_digit(self):
     fips_input = '34041'
     self.assertEqual(validate_fips(fips_input), '34041')

Python validate_fips примеры использования