示例#1
0
    def setup_method(self):
        if (os.environ.get('APIKEY') and os.environ.get('USERNAME')):
            self.apikey = os.environ['APIKEY']
            self.username = os.environ['USERNAME']
        else:
            creds = json.loads(open('tests/e2e/secret.json').read())
            self.apikey = creds['APIKEY']
            self.username = creds['USERNAME']

        self.credentials = Credentials(self.username, self.apikey)
        self.enrichment = Enrichment(self.credentials)

        self.points_gdf = read_file(file_path('files/points.geojson'))
        self.polygons_gdf = read_file(file_path('files/polygon.geojson'))

        # from carto-do-public-data.usa_acs.demographics_sociodemographics_usa_censustract_2015_5yrs_20132017
        self.public_variable1 = public_variable1
        self.public_variable2 = public_variable2
        self.public_variable3 = public_variable3
        self.public_variables = [
            self.public_variable1, self.public_variable2, self.public_variable3
        ]

        # from carto-do.ags.demographics_retailpotential_usa_blockgroup_2015_yearly_2019
        self.private_variable1 = private_variable1
        self.private_variable2 = private_variable2
        self.private_variable3 = private_variable3
        self.private_variables = [
            self.private_variable1, self.private_variable2,
            self.private_variable3
        ]
    def test_enrichment_query_by_points_two_variables_different_datasets(
            self, geography_get_mock, dataset_get_mock):
        enrichment = Enrichment(credentials=self.credentials)

        temp_table_name = 'test_table'
        project = 'project'
        dataset1 = 'dataset1'
        dataset2 = 'dataset2'
        table1 = 'table1'
        table2 = 'table2'
        variable1_name = 'variable1'
        variable2_name = 'variable2'
        column1 = 'column1'
        column2 = 'column2'
        geo_table = 'geo_table'
        view1 = 'view_{}_{}'.format(dataset1, table1)
        view2 = 'view_{}_{}'.format(dataset2, table2)
        geo_view = 'view_{}_{}'.format(dataset1, geo_table)

        variable1 = Variable({
            'id':
            '{}.{}.{}.{}'.format(project, dataset1, table1, variable1_name),
            'column_name':
            column1,
            'dataset_id':
            'fake_name'
        })
        variable2 = Variable({
            'id':
            '{}.{}.{}.{}'.format(project, dataset2, table2, variable2_name),
            'column_name':
            column2,
            'dataset_id':
            'fake_name'
        })
        variables = [variable1, variable2]

        catalog = CatalogEntityWithGeographyMock('{}.{}.{}'.format(
            project, dataset1, geo_table))
        dataset_get_mock.return_value = catalog
        geography_get_mock.return_value = GeographyMock()

        actual_queries = enrichment._get_points_enrichment_sql(
            temp_table_name, variables, [])

        expected_queries = [
            get_query([column1], self.username, view1, geo_view,
                      temp_table_name),
            get_query([column2], self.username, view2, geo_view,
                      temp_table_name)
        ]

        actual = sorted(_clean_queries(actual_queries))
        expected = sorted(_clean_queries(expected_queries))

        assert actual == expected
    def test_enrichment_query_by_points_with_filters(self, geography_get_mock,
                                                     dataset_get_mock,
                                                     _is_available_in_bq_mock):
        _is_available_in_bq_mock.return_value = True

        enrichment = Enrichment(credentials=self.credentials)

        temp_table_name = 'test_table'
        project = 'project'
        dataset = 'dataset'
        table = 'table'
        variable_name = 'variable1'
        column = 'column1'
        geo_table = 'geo_table'
        view = 'view_{}_{}'.format(dataset, table)
        geo_view = 'view_{}_{}'.format(dataset, geo_table)

        variable = Variable({
            'id':
            '{}.{}.{}.{}'.format(project, dataset, table, variable_name),
            'column_name':
            column,
            'dataset_id':
            'fake_name'
        })
        variables = [variable]

        filters = {variable.id: "= 'a string'"}
        expected_filters = ["{} = 'a string'".format(variable.column_name)]

        catalog = CatalogEntityWithGeographyMock('{}.{}.{}'.format(
            project, dataset, geo_table))
        dataset_get_mock.return_value = catalog
        geography_get_mock.return_value = GeographyMock()

        actual_queries = enrichment._get_points_enrichment_sql(
            temp_table_name, variables, filters)

        expected_queries = [
            get_query([column], self.username, view, geo_view, temp_table_name,
                      expected_filters)
        ]

        actual = sorted(_clean_queries(actual_queries))
        expected = sorted(_clean_queries(expected_queries))

        assert actual == expected
    def test_enrichment_query_by_points_one_variable(self, geography_get_mock,
                                                     dataset_get_mock):
        enrichment = Enrichment(credentials=self.credentials)

        temp_table_name = 'test_table'
        project = 'project'
        dataset = 'dataset'
        table = 'table'
        variable_name = 'variable1'
        column = 'column1'
        geo_table = 'geo_table'
        view = 'view_{}_{}'.format(dataset, table)
        geo_view = 'view_{}_{}'.format(dataset, geo_table)

        variable = Variable({
            'id':
            '{}.{}.{}.{}'.format(project, dataset, table, variable_name),
            'column_name':
            column,
            'dataset_id':
            'fake_name'
        })
        variables = [variable]

        catalog = CatalogEntityWithGeographyMock('{}.{}.{}'.format(
            project, dataset, geo_table))
        dataset_get_mock.return_value = catalog
        geography_get_mock.return_value = GeographyMock()

        actual_queries = enrichment._get_points_enrichment_sql(
            temp_table_name, variables, [])

        expected_queries = [
            get_query([column], self.username, view, geo_view, temp_table_name)
        ]

        actual = sorted(_clean_queries(actual_queries))
        expected = sorted(_clean_queries(expected_queries))

        assert actual == expected
示例#5
0
class TestEnrichment(object):
    def setup_method(self):
        if (os.environ.get('APIKEY') and os.environ.get('USERNAME')):
            self.apikey = os.environ['APIKEY']
            self.username = os.environ['USERNAME']
        else:
            creds = json.loads(open('tests/e2e/secret.json').read())
            self.apikey = creds['APIKEY']
            self.username = creds['USERNAME']

        self.credentials = Credentials(self.username, self.apikey)
        self.enrichment = Enrichment(self.credentials)

        self.points_gdf = read_file(file_path('files/points.geojson'))
        self.polygons_gdf = read_file(file_path('files/polygon.geojson'))

        # from carto-do-public-data.usa_acs.demographics_sociodemographics_usa_censustract_2015_5yrs_20132017
        self.public_variable1 = public_variable1
        self.public_variable2 = public_variable2
        self.public_variable3 = public_variable3
        self.public_variables = [
            self.public_variable1, self.public_variable2, self.public_variable3
        ]

        # from carto-do.ags.demographics_retailpotential_usa_blockgroup_2015_yearly_2019
        self.private_variable1 = private_variable1
        self.private_variable2 = private_variable2
        self.private_variable3 = private_variable3
        self.private_variables = [
            self.private_variable1, self.private_variable2,
            self.private_variable3
        ]

    def test_points_and_private_data(self):
        enriched_gdf = self.enrichment.enrich_points(
            self.points_gdf, variables=self.private_variables)

        expected_gdf = read_file(file_path('files/points-private.geojson'))

        enriched_gdf = clean_gdf(enriched_gdf)
        expected_gdf = clean_gdf(expected_gdf)

        assert enriched_gdf.equals(expected_gdf)

    def test_points_public_data_and_filters(self):
        enriched_gdf = self.enrichment.enrich_points(
            self.points_gdf,
            variables=self.public_variables,
            filters={
                self.public_variable1.id: '< 300',
                self.public_variable2.id: '> 300'
            })

        expected_gdf = read_file(
            file_path('files/points-public-filter.geojson'))

        enriched_gdf = clean_gdf(enriched_gdf)
        expected_gdf = clean_gdf(expected_gdf)

        assert enriched_gdf.equals(expected_gdf)

    def test_polygons_and_public_data(self):
        enriched_gdf = self.enrichment.enrich_polygons(
            self.polygons_gdf, variables=self.public_variables)

        expected_gdf = read_file(file_path('files/polygon-public.geojson'))

        enriched_gdf = clean_gdf(enriched_gdf)
        expected_gdf = clean_gdf(expected_gdf)

        assert enriched_gdf.equals(expected_gdf)

    def test_polygons_private_data_and_agg_none(self):
        enriched_gdf = self.enrichment.enrich_polygons(
            self.polygons_gdf,
            variables=self.public_variables,
            aggregation=None,
            filters={
                self.public_variable1.id: '> 300',
                self.public_variable2.id: '< 800'
            })

        expected_gdf = read_file(
            file_path('files/polygon-public-none.geojson'))

        enriched_gdf = clean_gdf(enriched_gdf,
                                 self.public_variable1.column_name)
        expected_gdf = clean_gdf(expected_gdf,
                                 self.public_variable1.column_name)

        assert enriched_gdf.equals(expected_gdf)

    def test_polygons_private_data_and_agg_custom(self):
        enriched_gdf = self.enrichment.enrich_polygons(
            self.polygons_gdf,
            variables=[self.private_variable1, self.private_variable2],
            aggregation='AVG')

        expected_gdf = read_file(
            file_path('files/polygon-private-avg.geojson'))

        enriched_gdf = clean_gdf(enriched_gdf)
        expected_gdf = clean_gdf(expected_gdf)

        assert enriched_gdf.equals(expected_gdf)

    def test_polygons_public_data_agg_custom_and_filters(self):
        enriched_gdf = self.enrichment.enrich_polygons(
            self.polygons_gdf,
            variables=[self.public_variable1, self.public_variable2],
            aggregation='SUM',
            filters={self.public_variable1.id: '> 500'})

        expected_gdf = read_file(
            file_path('files/polygon-public-agg-custom-filter.geojson'))

        enriched_gdf = clean_gdf(enriched_gdf)
        expected_gdf = clean_gdf(expected_gdf)

        assert enriched_gdf.equals(expected_gdf)

    def test_polygons_public_data_and_agg_custom_by_var(self):
        enriched_gdf = self.enrichment.enrich_polygons(
            self.polygons_gdf,
            variables=self.public_variables,
            aggregation={
                self.public_variable1.id: 'SUM',
                self.public_variable2.id: 'COUNT',
                self.public_variable3.id: 'STRING_AGG'
            })

        expected_gdf = read_file(
            file_path('files/polygon-public-agg-custom-by-var.geojson'))

        enriched_gdf = clean_gdf(enriched_gdf)
        expected_gdf = clean_gdf(expected_gdf)

        # geoid comes with different order in each execution
        enriched_geoids = enriched_gdf["geoid"].values[0].split(',')
        enriched_geoids.sort()
        enriched_gdf["geoid"] = None
        expected_geoids = expected_gdf["geoid"].values[0].split(',')
        expected_geoids.sort()
        expected_gdf["geoid"] = None

        assert enriched_gdf.equals(expected_gdf)
        assert enriched_geoids == expected_geoids