def upload_to_gcs(self, gcs_bucket):
        """Uploads population data from census to GCS bucket."""
        metadata = fetch_acs_metadata(self.base_acs_url)
        var_map = parse_acs_metadata(metadata, list(GROUPS.keys()))

        concepts = list(SEX_BY_AGE_CONCEPTS_TO_RACE.keys())
        concepts.append(HISPANIC_BY_RACE_CONCEPT)

        file_diff = False
        for concept in concepts:
            group_vars = get_vars_for_group(concept, var_map, 2)
            cols = list(group_vars.keys())
            url_params = get_census_params(cols, self.county_level)
            concept_file_diff = url_file_to_gcs.url_file_to_gcs(
                self.base_acs_url, url_params, gcs_bucket,
                self.get_filename(concept))
            file_diff = file_diff or concept_file_diff

        url_params = get_census_params([TOTAL_POP_VARIABLE_ID],
                                       self.county_level)
        next_file_diff = url_file_to_gcs.url_file_to_gcs(
            self.base_acs_url, url_params, gcs_bucket,
            self.add_filename_suffix(TOTAL_POP_VARIABLE_ID))
        file_diff = file_diff or next_file_diff
        return file_diff
Пример #2
0
    def upload_to_gcs(self, bucket):
        file_diff = False
        for group in POVERTY_BY_RACE_SEX_AGE_GROUPS:
            for is_county in [True, False]:
                file_diff = (url_file_to_gcs.url_file_to_gcs(
                    self.base_url,
                    get_params(group, is_county),
                    bucket,
                    get_filename(group, is_county),
                ) or file_diff)

        return file_diff
Пример #3
0
 def upload_to_gcs(self, url, gcs_bucket, filename):
     """Uploads household income data from SAIPE to GCS bucket for all available years."""
     year_range = {1989, 1993, *range(1995, 2019)}
     file_diff = False
     for year in year_range:
         url_params = census.get_census_params_by_county(
             self.get_household_income_columns().keys())
         url_params['time'] = year
         next_file_diff = url_file_to_gcs.url_file_to_gcs(
             url, url_params, gcs_bucket, '{}_{}.json'.format(filename, year))
         file_diff = file_diff or next_file_diff
     return file_diff
Пример #4
0
    def upload_to_gcs(self, bucket):
        file_diff = False
        for is_county in [True, False]:
            params = get_params(HEALTH_INSURANCE_BY_SEX_GROUPS_PREFIX, is_county)

            file_diff = (
                url_file_to_gcs.url_file_to_gcs(
                    self.base_url, params, bucket, self.get_filename(is_county)
                )
                or file_diff
            )
        return file_diff
Пример #5
0
    def upload_to_gcs(self, gcs_bucket, **attrs):
        """
        Attempts to download a file from a url and upload as a
        blob to the given GCS bucket.

        Parameters:
            gcs_bucket: Name of the GCS bucket to upload to (without gs://).
            attrs: Additional message attributes such as url and filename that
                   are needed for this data source.

        Returns: A boolean indication of a file diff.
                 In the case that there are many files to download, this will
                 return true if there is at least one file that is different.
        """
        return url_file_to_gcs.url_file_to_gcs(
            self.get_attr(attrs, 'url'), None, gcs_bucket,
            self.get_attr(attrs, 'filename'))
    def upload_to_gcs(self, bucket):
        male_state_params = format_params(
            HEALTH_INSURANCE_BY_SEX_GROUPS_PREFIX,
            HEALTH_INSURANCE_BY_SEX_MALE_SUFFIXES)

        file_diff = url_file_to_gcs.url_file_to_gcs(
            self.base_url, male_state_params, bucket,
            self.get_filename(Sex.MALE, None, False))

        female_state_params = format_params(
            HEALTH_INSURANCE_BY_SEX_GROUPS_PREFIX,
            HEALTH_INSURANCE_BY_SEX_FEMALE_SUFFIXES)
        file_diff = url_file_to_gcs.url_file_to_gcs(
            self.base_url, female_state_params, bucket,
            self.get_filename(Sex.FEMALE, None, False)) or file_diff

        male_county_params = format_params(
            HEALTH_INSURANCE_BY_SEX_GROUPS_PREFIX,
            HEALTH_INSURANCE_BY_SEX_MALE_SUFFIXES, True)
        file_diff = url_file_to_gcs.url_file_to_gcs(
            self.base_url, male_county_params, bucket,
            self.get_filename(Sex.MALE, None, True)) or file_diff

        female_county_params = format_params(
            HEALTH_INSURANCE_BY_SEX_GROUPS_PREFIX,
            HEALTH_INSURANCE_BY_SEX_FEMALE_SUFFIXES, True)
        file_diff = url_file_to_gcs.url_file_to_gcs(
            self.base_url, female_county_params, bucket,
            self.get_filename(Sex.FEMALE, None, True)) or file_diff

        # Iterates over the different race ACS variables,
        # retrieves the race from the metadata merged dict
        # writes the data to the GCS bucket and sees if file diff is changed
        for prefix in HEALTH_INSURANCE_BY_RACE_GROUP_PREFIXES:
            for prefix_key in prefix:
                race_state_params = format_params(
                    prefix, HEALTH_INSURANCE_BY_RACE_GROUP_SUFFIXES)
                race = prefix[prefix_key][MetadataKey.RACE]
                file_diff = url_file_to_gcs.url_file_to_gcs(
                    self.base_url, race_state_params, bucket,
                    self.get_filename(None, race, False)) or file_diff

                race_county_params = format_params(
                    prefix, HEALTH_INSURANCE_BY_RACE_GROUP_SUFFIXES, True)
                file_diff = url_file_to_gcs.url_file_to_gcs(
                    self.base_url, race_county_params, bucket,
                    self.get_filename(None, race, True)) or file_diff

        return file_diff
Пример #7
0
    def upload_to_gcs(self, bucket):
        # Iterates over the different race ACS variables,
        # retrieves the race from the metadata merged dict
        # writes the data to the GCS bucket and sees if file diff is changed
        file_diff = False
        for prefix_key in HEALTH_INSURANCE_BY_RACE_GROUP_PREFIXES:
            race = HEALTH_INSURANCE_BY_RACE_GROUP_PREFIXES[prefix_key]
            for is_county in [True, False]:
                params = get_params(prefix_key, is_county)

                file_diff = (
                    url_file_to_gcs.url_file_to_gcs(
                        self.base_url,
                        params,
                        bucket,
                        self.get_filename(race, is_county),
                    )
                    or file_diff
                )

        return file_diff
Пример #8
0
 def upload_to_gcs(self, url, gcs_bucket, filename):
     """Uploads state names and FIPS codes from census to GCS bucket."""
     url_params = {'get': 'NAME', 'for': 'state:*'}
     return url_file_to_gcs.url_file_to_gcs(url, url_params, gcs_bucket,
                                            filename)
Пример #9
0
 def upload_to_gcs(self, url, gcs_bucket, filename):
     """Uploads county names and FIPS codes from census to GCS bucket."""
     url_params = census.get_census_params_by_county(['NAME'])
     return url_file_to_gcs.url_file_to_gcs(url, url_params, gcs_bucket,
                                            filename)