Exemplo n.º 1
0
def test_DODataset_name(api_key_auth_client_usr):
    bq_user_dataset = DODataset(auth_client=api_key_auth_client_usr)

    name = 'fake-name'
    result = bq_user_dataset.name(name)
    assert bq_user_dataset == result
    assert bq_user_dataset._name == name
Exemplo n.º 2
0
def test_can_download_to_dataframe(mocker, api_key_auth_client_usr):
    # mock
    fake_response = ResponseMock(StringIO(CSV_SAMPLE_REDUCED))
    mocker.patch.object(APIKeyAuthClient, 'send', return_value=fake_response)

    bq_user_dataset = DODataset(auth_client=api_key_auth_client_usr)

    # test
    result = bq_user_dataset.name(
        'census_tracts_american_samoa').download_stream()
    assert isinstance(result, ResponseStream)
Exemplo n.º 3
0
def test_can_upload_from_file_object(mocker, api_key_auth_client_usr):
    # mock
    fake_response = ResponseMock()
    mocker.patch.object(APIKeyAuthClient, 'send', return_value=fake_response)

    bq_user_dataset = DODataset(auth_client=api_key_auth_client_usr)

    # test
    unique_table_name = 'cf_test_table_' + str(uuid.uuid4()).replace('-', '_')
    file_object = StringIO(CSV_SAMPLE_REDUCED)
    result = bq_user_dataset.name(unique_table_name).upload_file_object(
        file_object)
    assert result == fake_response
Exemplo n.º 4
0
def test_can_import_a_dataset(mocker, api_key_auth_client_usr):
    # mock
    fake_response = ResponseMock({'item_queue_id': '123'})
    mocker.patch.object(APIKeyAuthClient, 'send', return_value=fake_response)

    bq_user_dataset = DODataset(auth_client=api_key_auth_client_usr)

    # test
    unique_table_name = 'cf_test_table_' + str(uuid.uuid4()).replace('-', '_')
    file_object = StringIO(CSV_SAMPLE_REDUCED)

    dataset = bq_user_dataset.name(unique_table_name) \
        .column(name='id', type='INT64') \
        .column('geom', 'GEOMETRY') \
        .ttl_seconds(30)
    dataset.create()
    dataset.upload_file_object(file_object)
    job = dataset.import_dataset()

    assert isinstance(job, DODatasetJob)
Exemplo n.º 5
0
class TestDODataset(unittest.TestCase):
    """This test suite needs the ENV variable USERURL pointing to a working DO API in "tests/e2e/secret.json".
    DO API must have the user/apikey mapping set to get access to the user's DO Project in GCP.
    """
    def setUp(self):
        if os.environ.get('APIKEY') and os.environ.get(
                'USERNAME') and os.environ.get('USERURL'):
            self.apikey = os.environ['APIKEY']
            self.username = os.environ['USERNAME']
            self.base_url = os.environ['USERURL']
        else:
            creds = json.loads(open('tests/e2e/secret.json').read())
            self.apikey = creds['APIKEY']
            self.username = creds['USERNAME']
            self.base_url = creds['USERURL']

        credentials = Credentials(username=self.username,
                                  api_key=self.apikey,
                                  base_url=self.base_url)
        auth_client = credentials.get_api_key_auth_client()
        self.do_dataset = DODataset(auth_client=auth_client)

    def test_can_upload_from_dataframe(self):
        sample = StringIO(CSV_SAMPLE_REDUCED)
        df = pandas.read_csv(sample)
        unique_table_name = 'cf_test_table_' + str(uuid.uuid4()).replace(
            '-', '_')
        self.do_dataset.name(unique_table_name).upload(df)

    def test_can_upload_from_file_object(self):
        unique_table_name = 'cf_test_table_' + str(uuid.uuid4()).replace(
            '-', '_')
        file_object = StringIO(CSV_SAMPLE_REDUCED)
        self.do_dataset.name(unique_table_name).upload_file_object(file_object)

    def test_can_import_a_dataset(self):
        unique_table_name = 'cf_test_table_' + str(uuid.uuid4()).replace(
            '-', '_')
        file_object = StringIO(CSV_SAMPLE_REDUCED)

        dataset = self.do_dataset.name(unique_table_name) \
            .column(name='id', type='INT64') \
            .column('geom', 'GEOMETRY') \
            .ttl_seconds(30)
        dataset.create()
        dataset.upload_file_object(file_object)
        job = dataset.import_dataset()

        self.assertIsInstance(job, DODatasetJob)

    def test_can_get_status_from_import(self):
        unique_table_name = 'cf_test_table_' + str(uuid.uuid4()).replace(
            '-', '_')
        file_object = StringIO(CSV_SAMPLE_REDUCED)

        dataset = self.do_dataset.name(unique_table_name) \
            .column(name='id', type='INT64') \
            .column('geom', 'GEOMETRY') \
            .ttl_seconds(30)
        dataset.create()
        dataset.upload_file_object(file_object)
        job = dataset.import_dataset()
        status = job.status()

        self.assertIn(
            status, ['pending', 'running', 'cancelled', 'success', 'failure'])

    def test_can_wait_for_job_completion(self):
        unique_table_name = 'cf_test_table_' + str(uuid.uuid4()).replace(
            '-', '_')
        file_object = StringIO(CSV_SAMPLE_REDUCED)

        dataset = self.do_dataset.name(unique_table_name) \
            .column(name='id', type='INT64') \
            .column('geom', 'GEOMETRY') \
            .ttl_seconds(30)
        dataset.create()
        dataset.upload_file_object(file_object, geom_column='geom')
        job = dataset.import_dataset()
        status = job.result()

        self.assertIn(status, ['success'])

    def test_can_upload_a_dataframe_and_wait_for_completion(self):
        unique_table_name = 'cf_test_table_' + str(uuid.uuid4()).replace(
            '-', '_')
        sample = StringIO(CSV_SAMPLE_REDUCED)
        df = pandas.read_csv(sample)

        dataset = self.do_dataset.name(unique_table_name) \
            .column(name='id', type='INT64') \
            .column('geom', 'GEOMETRY') \
            .ttl_seconds(30)
        dataset.create()
        status = dataset.upload_dataframe(df, geom_column='geom')

        self.assertIn(status, ['success'])

    def test_can_download_to_dataframe(self):
        result = self.do_dataset.name(
            'census_tracts_american_samoa').download_stream()
        df = pandas.read_csv(result)

        self.assertEqual(df.shape, (18, 13))

        # do some checks on the contents
        sample = pandas.DataFrame(df.head(),
                                  columns=('state_fips_code',
                                           'county_fips_code', 'geo_id',
                                           'tract_name', 'internal_point_geo'))
        sample['internal_point_geo'] = df['internal_point_geo'].apply(
            wkt.loads)
        geosample = geopandas.GeoDataFrame(sample,
                                           geometry='internal_point_geo')

        self.assertEqual(geosample.to_csv(index=False), EXPECTED_CSV_SAMPLE)

    def test_creation_of_dataset(self):
        unique_table_name = 'cf_test_table_' + str(uuid.uuid4()).replace(
            '-', '_')

        dataset = self.do_dataset.name(unique_table_name) \
            .column(name='cartodb_id', type='INT64') \
            .column('the_geom', 'GEOMETRY') \
            .ttl_seconds(30)
        dataset.create()

        # do a quick check on the resulting table
        result = dataset.download_stream()
        df = pandas.read_csv(result)
        self.assertEqual(df.shape, (0, 2))
        self.assertEqual(df.to_csv(index=False), 'cartodb_id,the_geom\n')

    def test_points_enrichment_dataset(self):
        variable_slug = 'poverty_a86da569'
        variable_column_name = 'poverty'

        unique_table_name = 'cf_test_table_' + str(uuid.uuid4()).replace(
            '-', '_')
        gdf = read_file(
            file_path('../observatory/enrichment/files/points.geojson'))
        gdf[_ENRICHMENT_ID] = range(gdf.shape[0])
        gdf[_GEOM_COLUMN] = gdf.geometry
        gdf = gdf[[_ENRICHMENT_ID, _GEOM_COLUMN]]

        dataset = self.do_dataset.name(unique_table_name) \
            .column(_ENRICHMENT_ID, 'INT64') \
            .column(_GEOM_COLUMN, 'GEOMETRY') \
            .ttl_seconds(_TTL_IN_SECONDS)
        dataset.create()
        status = dataset.upload_dataframe(gdf, geom_column=_GEOM_COLUMN)
        self.assertIn(status, ['success'])

        geom_type = GEOM_TYPE_POINTS
        variables = [variable_slug]
        output_name = '{}_result'.format(unique_table_name)
        status = dataset.enrichment(geom_type=geom_type,
                                    variables=variables,
                                    output_name=output_name)

        self.assertIn(status, ['success'])

        result = self.do_dataset.name(output_name).download_stream()
        result_df = pandas.read_csv(result)

        self.assertIn(variable_column_name, result_df.columns)

    def test_polygons_enrichment_dataset(self):
        variable_slug = 'poverty_a86da569'
        variable_column_name = 'poverty'

        unique_table_name = 'cf_test_table_' + str(uuid.uuid4()).replace(
            '-', '_')
        gdf = read_file(
            file_path('../observatory/enrichment/files/polygon.geojson'))
        gdf[_ENRICHMENT_ID] = range(gdf.shape[0])
        gdf[_GEOM_COLUMN] = gdf.geometry
        gdf = gdf[[_ENRICHMENT_ID, _GEOM_COLUMN]]

        dataset = self.do_dataset.name(unique_table_name) \
            .column(_ENRICHMENT_ID, 'INT64') \
            .column(_GEOM_COLUMN, 'GEOMETRY') \
            .ttl_seconds(_TTL_IN_SECONDS)
        dataset.create()
        status = dataset.upload_dataframe(gdf, geom_column=_GEOM_COLUMN)
        self.assertIn(status, ['success'])

        geom_type = GEOM_TYPE_POLYGONS
        variables = [variable_slug]
        output_name = '{}_result'.format(unique_table_name)
        status = dataset.enrichment(geom_type=geom_type,
                                    variables=variables,
                                    output_name=output_name)

        self.assertIn(status, ['success'])

        result = self.do_dataset.name(output_name).download_stream()
        df = pandas.read_csv(result)

        self.assertIn(variable_column_name, df.columns)