Exemplo n.º 1
0
    def __init__(self):
        self.data = read_carto('meuse')
        self.data['log_zinc'] = np.log(self.data['zinc'])
        self.data = self.data.to_crs({'init': 'epsg:28992'})
        self.data_lonlat = self.data.to_crs({'init': 'epsg:4326'})

        self.data_grid = read_carto('meuse_grid')
        self.data_grid = self.data_grid.to_crs({'init': 'epsg:28992'})
        self.data_grid_lonlat = self.data_grid.to_crs({'init': 'epsg:4326'})
Exemplo n.º 2
0
    def test_augment_with_persist_as(self):
        """DataObsClient.augment with persist_as"""
        do = DataObsClient(self.credentials)

        meta = do.discovery(self.test_read_table,
                            keywords=('poverty', ),
                            time=('2010 - 2014', ))
        gdf = do.augment(self.test_data_table, meta)
        anscols = set(meta['suggested_name'])
        origcols = set(
            read_carto(self.test_data_table,
                       credentials=self.credentials,
                       limit=1,
                       decode_geom=True).columns)
        self.assertSetEqual(
            anscols,
            set(gdf.columns) - origcols - {'the_geom', 'cartodb_id'})

        meta = [
            {
                'numer_id': 'us.census.acs.B19013001',
                'geom_id': 'us.census.tiger.block_group',
                'numer_timespan': '2011 - 2015'
            },
        ]
        gdf = do.augment(self.test_data_table,
                         meta,
                         persist_as=self.test_write_table)
        self.assertSetEqual(
            set(('median_income_2011_2015', )),
            set(gdf.columns) - origcols - {'the_geom', 'cartodb_id'})
        self.assertEqual(gdf.index.name, 'cartodb_id')
        self.assertEqual(gdf.index.dtype, 'int64')

        df = read_carto(self.test_write_table,
                        credentials=self.credentials,
                        decode_geom=False)

        self.assertEqual(df.index.name, 'cartodb_id')
        self.assertEqual(df.index.dtype, 'int64')

        # same number of rows
        self.assertEqual(len(df), len(gdf), msg='Expected number or rows')

        # same type of object
        self.assertIsInstance(df, pd.DataFrame, 'Should be a pandas DataFrame')
        # same column names
        self.assertSetEqual(set(gdf.columns.values),
                            set(df.columns.values),
                            msg='Should have the columns requested')

        # should have exected schema
        self.assertEqual(sorted(tuple(str(d) for d in df.dtypes)),
                         sorted(tuple(str(d) for d in gdf.dtypes)),
                         msg='Should have same schema/types')
Exemplo n.º 3
0
 def __init__(self, var, var_value):
     self.filename = '/tmp/UK_Police_street_crimes_2019_04.csv'
     self.data = read_carto('uk_police_street_crimes_2019_04')
     self.data = self.data[self.data[var] == var_value]
     self.data_lonlat = self.data
     self.data_lonlat = read_carto('''
         SELECT c.*
           FROM uk_police_street_crimes_2019_04 as c
           JOIN london_borough_excluding_mhw as g
           ON ST_Intersects(c.the_geom, g.the_geom)
     
     ''')
     self.data = self.data.to_crs({'init': 'epsg:32630'})
Exemplo n.º 4
0
    def test_isochrones_from_table_query_as_new_table(self):
        self.skip(if_no_credits=True, if_no_credentials=True)
        iso = Isolines(credentials=self.credentials)

        gdf = read_carto(self.points_query())

        result_table_name = self.get_test_table_name('isotbr')

        quota = self.used_quota(iso)

        # Preview
        result = iso.isochrones(gdf, [100, 1000],
                                mode='car',
                                table_name=result_table_name,
                                dry_run=True,
                                exclusive=True).metadata
        self.assertEqual(result.get('required_quota'), 6)
        self.assertEqual(self.used_quota(iso), quota)

        # Isochrones
        result = iso.isochrones(gdf, [100, 1000],
                                mode='car',
                                table_name=result_table_name,
                                exclusive=True).data
        self.assertTrue(isinstance(result, GeoDataFrame))
        self.assertTrue(result.is_remote())
        quota += 6
        self.assertEqual(self.used_quota(iso), quota)
        result_columns = result.get_column_names()
        self.assertTrue('the_geom' in result_columns)
        self.assertTrue('data_range' in result_columns)
        self.assertEqual(result.get_num_rows(), 6)
        self.assertTrue('cartodb_id' in result_columns)
        self.assertFalse('source_id' in result_columns)
Exemplo n.º 5
0
 def __init__(self):
     self.data_carto = read_carto('boston_housing')
     ## Renaming the geometry column from 'the_geom' to 'geometry' 
     ## (pysal expect the geometry column to be called 'geometry')
     self.data = self.data_carto.copy()
     self.data['geometry'] = self.data.geometry
     self.data.drop(['the_geom'],axis = 1, inplace = True)
     self.data = gpd.GeoDataFrame(self.data, geometry = 'geometry')
     self.w = Queen.from_dataframe(self.data)
Exemplo n.º 6
0
def get_table(tablename):
    """Retrieve tablename as a GeoDataFrame ordered by database id

    Returns:
        geopandas.GeoDataFrame: GeoDataFrame representation of table
    """
    base_query = ("SELECT * FROM {tablename} ORDER BY cartodb_id ASC").format(
        tablename=tablename)
    data_carto = read_carto(base_query)
    ## Renaming the geometry column from 'the_geom' to 'geometry'
    ## (pysal expect the geometry column to be called 'geometry')
    data = data_carto.copy()
    data['geometry'] = data.geometry
    data.drop(['the_geom'], axis=1, inplace=True)
    data = gpd.GeoDataFrame(data, geometry='geometry')
    data.crs = {"init": "epsg:4326"}

    return data
def get_carto_years(carto_table, data_col):
    '''
    Given a Carto table name and a column where we expect to have data, this function will return a list of all the
    years for which there is data in the table
    INPUT   carto_table: name of Carto table (string)
            data_col: name of column where we want to make sure we have data (string)
    RETURN  carto_years: years in table for which we have data (list of integers)
    '''

    # if there are multiple data columns to check
    if ';' in data_col:
        # turn columns into a list
        cols = data_col.split(';')
        # generate a WHERE statement to use in SQL query to remove rows where these columns are null
        where = ''
        for col in cols:
            where += col + ' IS NOT NULL AND '
        where = where[:-5]
    # if there is only one column to check
    else:
        # generate a WHERE statement to use in SQL query to remove rows where this column is null
        where = data_col +  ' IS NOT NULL'
    # query Carto table to get rows where there is data
    carto_df = cartoframes.read_carto(f' SELECT * from {carto_table} WHERE {where}', credentials=auth)

    # pull out a list of years from the 'year' column
    carto_years = [int(year) for year in np.unique(carto_df['year'])]

    # get count of occurrences of each year
    vc = carto_df['year'].value_counts()
    # pull out list of years with fewer than 10 data points
    years_to_drop = vc[vc < 10].index
    # keep list of these years that are more than 10 years old
    years_to_drop = [year for year in years_to_drop if year < datetime.datetime.utcnow().year - 10]
    # remove years with less that 10 countries of data if it is more than 10 years old
    carto_years = [year for year in carto_years if year not in years_to_drop]

    # put these years in order from oldest to newest
    carto_years.sort()
    return carto_years
Exemplo n.º 8
0
    def test_augment(self):
        """DataObsClient.augment"""
        do = DataObsClient(self.credentials)

        meta = do.discovery(self.test_read_table,
                            keywords=('poverty', ),
                            time=('2010 - 2014', ))
        gdf = do.augment(self.test_data_table, meta)
        anscols = set(meta['suggested_name'])
        origcols = set(
            read_carto(self.test_data_table,
                       credentials=self.credentials,
                       limit=1,
                       decode_geom=True).columns)
        self.assertSetEqual(
            anscols,
            set(gdf.columns) - origcols - {'the_geom', 'cartodb_id'})

        meta = [
            {
                'numer_id': 'us.census.acs.B19013001',
                'geom_id': 'us.census.tiger.block_group',
                'numer_timespan': '2011 - 2015'
            },
        ]
        gdf = do.augment(self.test_data_table, meta)
        self.assertSetEqual(
            set(('median_income_2011_2015', )),
            set(gdf.columns) - origcols - {'the_geom', 'cartodb_id'})

        with self.assertRaises(ValueError, msg='no measures'):
            meta = do.discovery('United States', keywords='not a measure')
            do.augment(self.test_read_table, meta)

        with self.assertRaises(ValueError, msg='too many metadata measures'):
            # returns ~180 measures
            meta = do.discovery(region='united states', keywords='education')
            do.augment(self.test_read_table, meta)
Exemplo n.º 9
0
    def test_isochrones_from_dataframe_as_new_table(self):
        self.skip(if_no_credits=True, if_no_credentials=True)
        iso = Isolines(credentials=self.credentials)

        df = DataFrame(self.points, columns=['name', 'the_geom'])

        quota = self.used_quota(iso)

        table_name = self.get_test_table_name('isodfds')

        # Preview
        result = iso.isochrones(df, [100, 1000],
                                mode='car',
                                table_name=table_name,
                                dry_run=True,
                                exclusive=True).metadata
        self.assertEqual(result.get('required_quota'), 6)
        self.assertEqual(self.used_quota(iso), quota)

        # Isochrones
        result = iso.isochrones(df, [100, 1000],
                                mode='car',
                                table_name=table_name,
                                exclusive=True).data
        self.assertTrue(isinstance(result, GeoDataFrame))
        quota += 6
        self.assertEqual(self.used_quota(iso), quota)
        self.assertTrue('the_geom' in result)
        self.assertTrue('data_range' in result)
        self.assertEqual(len(result.index), 6)

        gdf = read_carto(table_name, credentials=self.credentials)

        result_columns = gdf.columns
        self.assertTrue('the_geom' in result_columns)
        self.assertTrue('data_range' in result_columns)
        self.assertEqual(gdf.size, 6)
        self.assertTrue('source_id' in result_columns)
Exemplo n.º 10
0
    df_resp['pfaf_id_12'] = row['pfaf_id_12']
    df_resp['hybas_id_6'] = row['hybas_id_6']
    df_resp['hybas_id_3'] = row['hybas_id_3']
    df_resp['variable'] = variable
    df_resp['depth'] = depth
    cols_reorder = [
        'longitude', 'latitude', 'gridCentreLon', 'gridCentreLat', 'dt',
        'variable', 'depth', 'hyriv_id', 'pfaf_id_12', 'hybas_id_6',
        'hybas_id_3'
    ]
    df_resp = df_resp[cols_reorder]
    return df_resp


logger.debug('Pull river mouth data from Carto')
gdf_mouths = read_carto('ocn_calcs_010_target_river_mouths')

results = []
logger.info('Initiate multithreading for WMS requests')
# request all records for all permutations, combine into single dataframe
with concurrent.futures.ThreadPoolExecutor(max_workers=15) as executor:
    args_list = []
    for index, row in gdf_mouths.iterrows():
        args_list.append([index, row, variables[-1], depths[-1]])
    future_to_args = {
        executor.submit(pull_data, args[1], args[2], args[3]): args
        for args in args_list
    }
    for future in concurrent.futures.as_completed(future_to_args):
        args = future_to_args[future]
        try:
Exemplo n.º 11
0
}
RETRIEVE_URL_HEADERS = {
    'Authorization':os.getenv('GFW_API_KEY')
}

WORKING_DIR = os.path.join(os.getenv('DOWNLOAD_DIR'), 'gfw-api-data')
Path(WORKING_DIR).mkdir(parents=True, exist_ok=True)

logger.debug('Retrieve polygons from Carto')
# retrieve polygons from carto
eez_table = 'com_011_rw1_maritime_boundaries_edit'
# dataset includes following types:
# '12NM', '24NM', '200NM', 'Overlapping claim', 'Joint regime'
# the final three are of potential relevance here
# collect the data for them all, but maintain the distinction for later ease
gdf_zones = read_carto("SELECT *, ST_AsGeoJSON(the_geom) AS the_geom_geojson FROM com_011_rw1_maritime_boundaries_edit WHERE pol_type IN ('Overlapping claim','200NM','Joint regime') AND mrgid=8464",
        index_col='cartodb_id')
gdf_zones = gdf_zones.astype({'mrgid':'int','mrgid_ter1':'int','mrgid_sov1':'int',
        'mrgid_eez':'int',})
gdf_zones['json'] = gdf_zones.the_geom.to_json()

# create set of pairs of dates to loop through
date_pairs = [(date(year,1,1), date(year,12,31)) for year in range(2012, 2022)]

# create object to track api activity & results
# mrgid, geoname, year, id, url, zip, csv, value
# needed from original table: geoname, pol_type, iso_ter1, iso_sov1, iso_ter2, iso_sov2, iso_ter3, iso_sov3
col_type_dict = {
    'mrgid':'int',
    'geoname':'str',
    'pol_type':'str',
    'iso_ter1':'str',
    zip_file = zip_file_template.format(region_id)
    with zipfile.ZipFile(zip_file, 'r') as zip:
        zip.extractall(local_data_dir) 

gdf_l12 = None   
for region_id in region_ids:
    shp_file = shp_file_template.format(region_id)
    gdf_reg = gpd.read_file(shp_file, ignore_geometry=True)
    if gdf_l12 is None:
        gdf_l12 = gdf_reg
    else:
        gdf_l12 = gdf_l12.append(gdf_reg, ignore_index=True, verify_integrity=True, sort=False)
gdf_l12 = gdf_l12.astype({'HYBAS_ID':'str','PFAF_ID':'str'})

# load processed river mouth dataset
gdf_mouths = read_carto('ocn_calcs_010_target_river_mouths')
if 'pfaf_id_12' in gdf_mouths.columns:
    gdf_mouths.drop(columns=['pfaf_id_12','pfaf_id_5','pfaf_id_3','hybas_id_5','hybas_id_3'], inplace=True, errors='ignore')

# join level12 river basin data to river mouth data
# allows us to get the pfaf_id for each terminal river mouth level 12 basin
gdf_mouths = gdf_mouths.merge(gdf_l12, how='left', left_on='hybas_l12', right_on='HYBAS_ID', 
        sort=False, validate='many_to_one')
gdf_mouths.drop(columns=['HYBAS_ID', 'NEXT_DOWN',
       'NEXT_SINK', 'MAIN_BAS', 'DIST_SINK', 'DIST_MAIN', 'SUB_AREA',
       'UP_AREA', 'ENDO', 'COAST', 'ORDER', 'SORT'], inplace=True, errors='raise')
gdf_mouths.rename(columns={'PFAF_ID': 'pfaf_id_12'}, inplace=True, errors='raise')

# use level 12 basin pfaf_id to find the corresponding level 5 basin
gdf_mouths['pfaf_id_5'] = gdf_mouths['pfaf_id_12'].str.slice(stop=5)
gdf_l5 = read_carto("SELECT hybas_id::text, pfaf_id::text FROM wat_068_rw0_watersheds_edit WHERE level=5")