def setup_method(self): if (os.environ.get('APIKEY') and os.environ.get('USERNAME')): self.apikey = os.environ['APIKEY'] self.username = os.environ['USERNAME'] else: creds = json.loads(open('tests/e2e/secret.json').read()) self.apikey = creds['APIKEY'] self.username = creds['USERNAME'] self.credentials = Credentials(self.username, self.apikey) self.enrichment = Enrichment(self.credentials) self.points_gdf = read_file(file_path('files/points.geojson')) self.polygons_gdf = read_file(file_path('files/polygon.geojson')) # from carto-do-public-data.usa_acs.demographics_sociodemographics_usa_censustract_2015_5yrs_20132017 self.public_variable1 = public_variable1 self.public_variable2 = public_variable2 self.public_variable3 = public_variable3 self.public_variables = [ self.public_variable1, self.public_variable2, self.public_variable3 ] # from carto-do.ags.demographics_retailpotential_usa_blockgroup_2015_yearly_2019 self.private_variable1 = private_variable1 self.private_variable2 = private_variable2 self.private_variable3 = private_variable3 self.private_variables = [ self.private_variable1, self.private_variable2, self.private_variable3 ]
def test_enrichment_query_by_points_two_variables_different_datasets( self, geography_get_mock, dataset_get_mock): enrichment = Enrichment(credentials=self.credentials) temp_table_name = 'test_table' project = 'project' dataset1 = 'dataset1' dataset2 = 'dataset2' table1 = 'table1' table2 = 'table2' variable1_name = 'variable1' variable2_name = 'variable2' column1 = 'column1' column2 = 'column2' geo_table = 'geo_table' view1 = 'view_{}_{}'.format(dataset1, table1) view2 = 'view_{}_{}'.format(dataset2, table2) geo_view = 'view_{}_{}'.format(dataset1, geo_table) variable1 = Variable({ 'id': '{}.{}.{}.{}'.format(project, dataset1, table1, variable1_name), 'column_name': column1, 'dataset_id': 'fake_name' }) variable2 = Variable({ 'id': '{}.{}.{}.{}'.format(project, dataset2, table2, variable2_name), 'column_name': column2, 'dataset_id': 'fake_name' }) variables = [variable1, variable2] catalog = CatalogEntityWithGeographyMock('{}.{}.{}'.format( project, dataset1, geo_table)) dataset_get_mock.return_value = catalog geography_get_mock.return_value = GeographyMock() actual_queries = enrichment._get_points_enrichment_sql( temp_table_name, variables, []) expected_queries = [ get_query([column1], self.username, view1, geo_view, temp_table_name), get_query([column2], self.username, view2, geo_view, temp_table_name) ] actual = sorted(_clean_queries(actual_queries)) expected = sorted(_clean_queries(expected_queries)) assert actual == expected
def test_enrichment_query_by_points_with_filters(self, geography_get_mock, dataset_get_mock, _is_available_in_bq_mock): _is_available_in_bq_mock.return_value = True enrichment = Enrichment(credentials=self.credentials) temp_table_name = 'test_table' project = 'project' dataset = 'dataset' table = 'table' variable_name = 'variable1' column = 'column1' geo_table = 'geo_table' view = 'view_{}_{}'.format(dataset, table) geo_view = 'view_{}_{}'.format(dataset, geo_table) variable = Variable({ 'id': '{}.{}.{}.{}'.format(project, dataset, table, variable_name), 'column_name': column, 'dataset_id': 'fake_name' }) variables = [variable] filters = {variable.id: "= 'a string'"} expected_filters = ["{} = 'a string'".format(variable.column_name)] catalog = CatalogEntityWithGeographyMock('{}.{}.{}'.format( project, dataset, geo_table)) dataset_get_mock.return_value = catalog geography_get_mock.return_value = GeographyMock() actual_queries = enrichment._get_points_enrichment_sql( temp_table_name, variables, filters) expected_queries = [ get_query([column], self.username, view, geo_view, temp_table_name, expected_filters) ] actual = sorted(_clean_queries(actual_queries)) expected = sorted(_clean_queries(expected_queries)) assert actual == expected
def test_enrichment_query_by_points_one_variable(self, geography_get_mock, dataset_get_mock): enrichment = Enrichment(credentials=self.credentials) temp_table_name = 'test_table' project = 'project' dataset = 'dataset' table = 'table' variable_name = 'variable1' column = 'column1' geo_table = 'geo_table' view = 'view_{}_{}'.format(dataset, table) geo_view = 'view_{}_{}'.format(dataset, geo_table) variable = Variable({ 'id': '{}.{}.{}.{}'.format(project, dataset, table, variable_name), 'column_name': column, 'dataset_id': 'fake_name' }) variables = [variable] catalog = CatalogEntityWithGeographyMock('{}.{}.{}'.format( project, dataset, geo_table)) dataset_get_mock.return_value = catalog geography_get_mock.return_value = GeographyMock() actual_queries = enrichment._get_points_enrichment_sql( temp_table_name, variables, []) expected_queries = [ get_query([column], self.username, view, geo_view, temp_table_name) ] actual = sorted(_clean_queries(actual_queries)) expected = sorted(_clean_queries(expected_queries)) assert actual == expected
class TestEnrichment(object): def setup_method(self): if (os.environ.get('APIKEY') and os.environ.get('USERNAME')): self.apikey = os.environ['APIKEY'] self.username = os.environ['USERNAME'] else: creds = json.loads(open('tests/e2e/secret.json').read()) self.apikey = creds['APIKEY'] self.username = creds['USERNAME'] self.credentials = Credentials(self.username, self.apikey) self.enrichment = Enrichment(self.credentials) self.points_gdf = read_file(file_path('files/points.geojson')) self.polygons_gdf = read_file(file_path('files/polygon.geojson')) # from carto-do-public-data.usa_acs.demographics_sociodemographics_usa_censustract_2015_5yrs_20132017 self.public_variable1 = public_variable1 self.public_variable2 = public_variable2 self.public_variable3 = public_variable3 self.public_variables = [ self.public_variable1, self.public_variable2, self.public_variable3 ] # from carto-do.ags.demographics_retailpotential_usa_blockgroup_2015_yearly_2019 self.private_variable1 = private_variable1 self.private_variable2 = private_variable2 self.private_variable3 = private_variable3 self.private_variables = [ self.private_variable1, self.private_variable2, self.private_variable3 ] def test_points_and_private_data(self): enriched_gdf = self.enrichment.enrich_points( self.points_gdf, variables=self.private_variables) expected_gdf = read_file(file_path('files/points-private.geojson')) enriched_gdf = clean_gdf(enriched_gdf) expected_gdf = clean_gdf(expected_gdf) assert enriched_gdf.equals(expected_gdf) def test_points_public_data_and_filters(self): enriched_gdf = self.enrichment.enrich_points( self.points_gdf, variables=self.public_variables, filters={ self.public_variable1.id: '< 300', self.public_variable2.id: '> 300' }) expected_gdf = read_file( file_path('files/points-public-filter.geojson')) enriched_gdf = clean_gdf(enriched_gdf) expected_gdf = clean_gdf(expected_gdf) assert enriched_gdf.equals(expected_gdf) def test_polygons_and_public_data(self): enriched_gdf = self.enrichment.enrich_polygons( self.polygons_gdf, variables=self.public_variables) expected_gdf = read_file(file_path('files/polygon-public.geojson')) enriched_gdf = clean_gdf(enriched_gdf) expected_gdf = clean_gdf(expected_gdf) assert enriched_gdf.equals(expected_gdf) def test_polygons_private_data_and_agg_none(self): enriched_gdf = self.enrichment.enrich_polygons( self.polygons_gdf, variables=self.public_variables, aggregation=None, filters={ self.public_variable1.id: '> 300', self.public_variable2.id: '< 800' }) expected_gdf = read_file( file_path('files/polygon-public-none.geojson')) enriched_gdf = clean_gdf(enriched_gdf, self.public_variable1.column_name) expected_gdf = clean_gdf(expected_gdf, self.public_variable1.column_name) assert enriched_gdf.equals(expected_gdf) def test_polygons_private_data_and_agg_custom(self): enriched_gdf = self.enrichment.enrich_polygons( self.polygons_gdf, variables=[self.private_variable1, self.private_variable2], aggregation='AVG') expected_gdf = read_file( file_path('files/polygon-private-avg.geojson')) enriched_gdf = clean_gdf(enriched_gdf) expected_gdf = clean_gdf(expected_gdf) assert enriched_gdf.equals(expected_gdf) def test_polygons_public_data_agg_custom_and_filters(self): enriched_gdf = self.enrichment.enrich_polygons( self.polygons_gdf, variables=[self.public_variable1, self.public_variable2], aggregation='SUM', filters={self.public_variable1.id: '> 500'}) expected_gdf = read_file( file_path('files/polygon-public-agg-custom-filter.geojson')) enriched_gdf = clean_gdf(enriched_gdf) expected_gdf = clean_gdf(expected_gdf) assert enriched_gdf.equals(expected_gdf) def test_polygons_public_data_and_agg_custom_by_var(self): enriched_gdf = self.enrichment.enrich_polygons( self.polygons_gdf, variables=self.public_variables, aggregation={ self.public_variable1.id: 'SUM', self.public_variable2.id: 'COUNT', self.public_variable3.id: 'STRING_AGG' }) expected_gdf = read_file( file_path('files/polygon-public-agg-custom-by-var.geojson')) enriched_gdf = clean_gdf(enriched_gdf) expected_gdf = clean_gdf(expected_gdf) # geoid comes with different order in each execution enriched_geoids = enriched_gdf["geoid"].values[0].split(',') enriched_geoids.sort() enriched_gdf["geoid"] = None expected_geoids = expected_gdf["geoid"].values[0].split(',') expected_geoids.sort() expected_gdf["geoid"] = None assert enriched_gdf.equals(expected_gdf) assert enriched_geoids == expected_geoids