def getData(self, gcs_bucket=None): for group in POVERTY_BY_RACE_SEX_AGE_GROUPS: for is_county in [True, False]: data = None if gcs_bucket is None: data = get_acs_data_from_variables( self.base_url, get_params(group, is_county)) else: data = gcs_to_bq_util.load_values_as_json( gcs_bucket, get_filename(group, is_county)) self.accumulate_acs_data(data)
def upload_to_gcs(self, bucket): file_diff = False for group in POVERTY_BY_RACE_SEX_AGE_GROUPS: for is_county in [True, False]: file_diff = (url_file_to_gcs.url_file_to_gcs( self.base_url, get_params(group, is_county), bucket, get_filename(group, is_county), ) or file_diff) return file_diff
def upload_to_gcs(self, bucket): file_diff = False for is_county in [True, False]: params = get_params(HEALTH_INSURANCE_BY_SEX_GROUPS_PREFIX, is_county) file_diff = ( url_file_to_gcs.url_file_to_gcs( self.base_url, params, bucket, self.get_filename(is_county) ) or file_diff ) return file_diff
def getData(self, gcs_bucket=None): for is_county in [True, False]: if gcs_bucket is not None: # LOAD JSON BLOBS FROM GCS data = gcs_to_bq_util.load_values_as_json( gcs_bucket, self.get_filename(is_county) ) else: # LOAD DATA FROM ACS (useful for local debug) data = get_acs_data_from_variables( self.base_url, get_params(HEALTH_INSURANCE_BY_SEX_GROUPS_PREFIX, False), ) # Aggregate and accumulate data in memory self.accumulate_acs_data(data)
def getData(self, gcs_bucket=None): if gcs_bucket is not None: for race in HEALTH_INSURANCE_BY_RACE_GROUP_PREFIXES.values(): for is_county in [True, False]: # Get cached data from GCS data = gcs_to_bq_util.load_values_as_json( gcs_bucket, self.get_filename(race, is_county) ) self.accumulate_acs_data(data) else: for prefix in HEALTH_INSURANCE_BY_RACE_GROUP_PREFIXES: for is_county in [True, False]: data = get_acs_data_from_variables( self.base_url, get_params(prefix, is_county) ) self.accumulate_acs_data(data)
def upload_to_gcs(self, bucket): # Iterates over the different race ACS variables, # retrieves the race from the metadata merged dict # writes the data to the GCS bucket and sees if file diff is changed file_diff = False for prefix_key in HEALTH_INSURANCE_BY_RACE_GROUP_PREFIXES: race = HEALTH_INSURANCE_BY_RACE_GROUP_PREFIXES[prefix_key] for is_county in [True, False]: params = get_params(prefix_key, is_county) file_diff = ( url_file_to_gcs.url_file_to_gcs( self.base_url, params, bucket, self.get_filename(race, is_county), ) or file_diff ) return file_diff