def fix_carto_geofields(asset_id=None): auth_client = APIKeyAuthClient(api_key=CARTO_API_KEY, base_url=USR_BASE_URL) sql = SQLClient(auth_client) # Now the problem with pushing this data through SQL calls is that Carto does not rerun the # processes that add values for the_geom and the_geom_webmercator. So it kind of seems like # we have to do this ourselves as documented at # https://gis.stackexchange.com/a/201908 q = f"UPDATE {TABLE_NAME} SET the_geom = ST_SetSRID(st_makepoint(longitude, latitude),4326)" if asset_id is not None: q += f" WHERE id = {asset_id}" # This can significantly speed up Carto geofield updates # when saving a single model instance. # This works because 'longitude' and 'latitude' are the names of the corresponding fields in the CSV file. results1 = sql.send(q) # This takes 12 seconds to run for 100,000 rows. # Exporting the data immediately after this is run oddly leads to the same CSV file as exporting before # it is run, but waiting a minute and exporting again gives something with the_geom values in the same # rows as the table on the Carto site. Basically, the exported CSV file can lag the view on the Carto # web site by a minute or two. q = f"SELECT ST_Transform(ST_SetSRID(st_makepoint(longitude, latitude),4326),3857) as the_geom_webmercator FROM {TABLE_NAME}" results2 = sql.send(q) # This one ran much faster. # One improvement is that you can replace ST_SetSRID(st_makepoint(lon, lat)) with CDB_LatLng(lat, lon) # though I don't know if it leads to any performance improvement. print( f"Tried to add values for the the_geom and the_geom_webmercator fields in {TABLE_NAME}. The requests completed in {results1['time']} s and {results2['time']} s." )
def clean(self): domain = self.cleaned_data['domain'] table_name = self.cleaned_data['table_name'] name_col = self.cleaned_data['name_col'] pcode_col = self.cleaned_data['pcode_col'] parent_code_col = self.cleaned_data['parent_code_col'] remap_table_name = self.cleaned_data['remap_table_name'] auth_client = LocationsCartoNoAuthClient( base_url="https://{}.carto.com/".format(str(domain))) sql_client = SQLClient(auth_client) try: sites = sql_client.send( 'select * from {} limit 1'.format(table_name)) except CartoException: logger.exception("CartoDB exception occured") raise ValidationError( "Couldn't connect to CartoDB table: {}".format(table_name)) else: row = sites['rows'][0] if name_col not in row: raise ValidationError( 'The Name column ({}) is not in table: {}'.format( name_col, table_name)) if pcode_col not in row: raise ValidationError( 'The PCode column ({}) is not in table: {}'.format( pcode_col, table_name)) if parent_code_col and parent_code_col not in row: raise ValidationError( 'The Parent Code column ({}) is not in table: {}'.format( parent_code_col, table_name)) if remap_table_name: try: remap_table = sql_client.send( 'select * from {} limit 1'.format(remap_table_name)) except CartoException: # pragma: no-cover logger.exception("CartoDB exception occured") raise ValidationError( "Couldn't connect to the CartoDB remap table: {}".format( remap_table_name)) else: row = remap_table['rows'][0] if 'old_pcode' not in row.keys(): raise ValidationError( 'The Old PCode column ({}) is not in table: {}'.format( 'old_pcode', remap_table_name)) if 'new_pcode' not in row.keys(): raise ValidationError( 'The New PCode column ({}) is not in table: {}'.format( 'new_pcode', remap_table_name)) return self.cleaned_data
def validate_locations_in_use(carto_table_pk): try: carto_table = CartoDBTable.objects.get(pk=carto_table_pk) except CartoDBTable.DoesNotExist as e: logger.exception('Cannot retrieve CartoDBTable with pk: %s', carto_table_pk) raise e database_pcodes = [] for row in Location.objects.all_locations().filter( gateway=carto_table.location_type).values('p_code'): database_pcodes.append(row['p_code']) auth_client = LocationsCartoNoAuthClient( base_url="https://{}.carto.com/".format(carto_table.domain)) sql_client = SQLClient(auth_client) try: qry = sql_client.send( 'select array_agg({}) AS aggregated_pcodes from {}'.format( carto_table.pcode_col, carto_table.table_name, )) new_carto_pcodes = qry['rows'][0]['aggregated_pcodes'] \ if len(qry['rows']) > 0 and "aggregated_pcodes" in qry['rows'][0] else [] remapped_pcode_pairs = [] if carto_table.remap_table_name: remap_qry = 'select old_pcode::text, new_pcode::text from {}'.format( carto_table.remap_table_name) remapped_pcode_pairs = sql_client.send(remap_qry)['rows'] except CartoException as e: logger.exception( "CartoDB exception occured during the data validation.") raise e remap_old_pcodes = [ remap_row['old_pcode'] for remap_row in remapped_pcode_pairs ] orphaned_pcodes = set(database_pcodes) - (set(new_carto_pcodes) | set(remap_old_pcodes)) orphaned_location_ids = Location.objects.all_locations().filter( p_code__in=list(orphaned_pcodes)) # if location ids with no remap in use are found, do not continue the import location_ids_bnriu = get_location_ids_in_use(orphaned_location_ids) if location_ids_bnriu: msg = "Location ids in use without remap found: {}".format(','.join( [str(iu) for iu in location_ids_bnriu])) logger.exception(msg) raise NoRemapInUseException(msg) return True
def get_carto_asset_ids(id_to_check=None): auth_client = APIKeyAuthClient(api_key=CARTO_API_KEY, base_url=USR_BASE_URL) sql = SQLClient(auth_client) if id_to_check is None: results = sql.send(f"SELECT id FROM {TABLE_NAME}") else: results = sql.send( f"SELECT id FROM {TABLE_NAME} WHERE id = {id_to_check}") ids = [r['id'] for r in results['rows']] return ids
def is_public(self, query): # Used to detect public tables in queries in the publication, # because privacy only works for tables. public_auth_client = _create_auth_client(self.credentials, public=True) public_sql_client = SQLClient(public_auth_client) exists_query = 'EXPLAIN {}'.format(query) try: public_sql_client.send(exists_query, do_post=False) return True except CartoException: return False
def create_table(self, table_name=None, cartodbfy=False): table_name = table_name or self.carto_table_name client = SQLClient(self.carto_auth_client) client.send( "CREATE TABLE IF NOT EXISTS {table_name} ({columns})".format( table_name=table_name, columns=",".join((name + " " + type for (name, type) in self.carto_fields)))) if cartodbfy is True: client.send( "SELECT CDB_CartodbfyTable('{schema}', '{table_name}')".format( schema=self.carto_auth_client.username, table_name=table_name))
class DotCartoFile(object): replacements = [] visualization_id = None json = None def __init__(self, dotcarto_file, endpoint_base_url, api_key): self.dotcarto_file = dotcarto_file self.sql = SQLClient(APIKeyAuthClient(endpoint_base_url, api_key)) def replace_dataset(self, original_dataset_name, new_dataset_name): self.replacements.append({ "original_dataset_name": original_dataset_name, "new_dataset_name": new_dataset_name }) def replace_datasets_in_dotcarto_file(self, zip_buffer): for replacement in self.replacements: original_dataset_name = replacement["original_dataset_name"] new_dataset_name = replacement["new_dataset_name"] self.json = self.json.replace(original_dataset_name, new_dataset_name) new_dataset = self.sql.send("select * from {dataset}".format( dataset=replacement["new_dataset_name"]), format="gpkg") zip_buffer.writestr( join(self.visualization_id, replacement["new_dataset_name"] + ".gpkg"), new_dataset)
def clean(self): domain = self.cleaned_data['domain'] table_name = self.cleaned_data['table_name'] name_col = self.cleaned_data['name_col'] pcode_col = self.cleaned_data['pcode_col'] parent_code_col = self.cleaned_data['parent_code_col'] auth_client = EtoolsCartoNoAuthClient( base_url="https://{}.carto.com/".format(str(domain))) sql_client = SQLClient(auth_client) try: sites = sql_client.send( 'select * from {} limit 1'.format(table_name)) except CartoException: logger.exception("CartoDB exception occured") raise ValidationError( "Couldn't connect to CartoDB table: {}".format(table_name)) else: row = sites['rows'][0] if name_col not in row: raise ValidationError( 'The Name column ({}) is not in table: {}'.format( name_col, table_name)) if pcode_col not in row: raise ValidationError( 'The PCode column ({}) is not in table: {}'.format( pcode_col, table_name)) if parent_code_col and parent_code_col not in row: raise ValidationError( 'The Parent Code column ({}) is not in table: {}'.format( parent_code_col, table_name)) return self.cleaned_data
def clean(self): domain = self.cleaned_data['domain'] api_key = self.cleaned_data['api_key'] table_name = self.cleaned_data['table_name'] client = get_carto_client(api_key, domain) sql = SQLClient(client) try: sites = sql.send( 'select * from {} limit 1'.format(table_name) ) except CartoException: logging.exception("CartoDB exception occured", exc_info=True) raise ValidationError( "Couldn't connect to CartoDB table: " + table_name) else: row = sites['rows'][0] if 'name' not in row: raise ValidationError( 'The Name column ({}) is not in table: {}'.format( 'name', table_name)) if 'pcode' not in row: raise ValidationError( 'The PCode column ({}) is not in table: {}'.format( 'pcode', table_name)) if self.cleaned_data['parent'] and 'parent_pcode' not in row: raise ValidationError( 'The Parent Code column ({}) is not in table: {}'.format( 'parent_pcode', table_name)) return self.cleaned_data
def update_asset_on_carto(asset_dict, fields): auth_client = APIKeyAuthClient(api_key=CARTO_API_KEY, base_url=USR_BASE_URL) sql = SQLClient(auth_client) #values_tuple_strings = [make_values_tuple_string_from_model(r, fields) for r in [asset]] # OR POSSIBLY #values_tuple_strings = [make_values_tuple_string_from_model(asset, fields)] #q = f"UPDATE {TABLE_NAME} SET {values_tuple_strings} WHERE asset_id = {asset.id};" #values_tuple_strings = [values_string_from_model(asset, fields)] #q = f"UPDATE {TABLE_NAME} SET ({', '.join(fields + ['the_geom', 'the_geom_webmercator'])}) " \ # f"VALUES {', '.join(map(lambda x: x + 1, values_tuple_strings))};" # This is throwing an # error, and it's really not clear why it's trying to map a math function over strings. # Let's ignore the the_geom* fields for now and do the update the simple way: # Single updates can be done like this: # UPDATE election_results SET votes=52, pro=24 WHERE county_id = 1; other_fields = copy.deepcopy(fields) other_fields.remove('id') q = f"UPDATE {TABLE_NAME} SET {set_string_from_model(asset_dict, other_fields)} WHERE id = {asset_dict['asset'].id};" assert len(q) < 16384 print(q) results = sql.send(q)
def test_sql_unverified_fails_with_auth_client(wrong_onprem_auth_client): if wrong_onprem_auth_client is None: assert True is True return sql = SQLClient(wrong_onprem_auth_client) with pytest.raises(CartoException): data = sql.send('select version()')
def test_sql_additional_params(api_key_auth_client_usr): sql = SQLClient(api_key_auth_client_usr) request_args = {"skipfields": "the_geom_webmercator"} data = sql.send('select * from ' + EXISTING_POINT_DATASET, do_post=True, **request_args) assert data is not None assert 'rows' in data assert 'total_rows' in data assert 'time' in data assert len(data['rows']) > 0 assert "the_geom_webmercator" not in data['rows'][0] data = sql.send('select * from ' + EXISTING_POINT_DATASET, do_post=True) assert "the_geom_webmercator" in data['rows'][0]
def test_sql_additional_params(api_key_auth_client_usr): sql = SQLClient(api_key_auth_client_usr) request_args = { "skipfields": "the_geom_webmercator" } data = sql.send('select * from ' + EXISTING_POINT_DATASET, do_post=True, **request_args) assert data is not None assert 'rows' in data assert 'total_rows' in data assert 'time' in data assert len(data['rows']) > 0 assert "the_geom_webmercator" not in data['rows'][0] data = sql.send('select * from ' + EXISTING_POINT_DATASET, do_post=True) assert "the_geom_webmercator" in data['rows'][0]
def dumpToCarto(eleList, table_name): # I am using my CARTO account USERNAME = "******" USR_BASE_URL = "https://{user}.carto.com/".format(user=USERNAME) auth_client = APIKeyAuthClient( api_key="53bb19efc968a08f7bdc2c1ffc29c31659240b39", base_url=USR_BASE_URL) sql = SQLClient(auth_client) table_name = 'strava_segments_' + table_name for segment in eleList: try: query = "UPDATE {table} SET cartodb_id={id}, the_geom=ST_SetSRID(ST_MakePoint({long}, {lat}),4326), name='{name}', value={value}, date=now() WHERE cartodb_id={id}". \ format(table=table_name,id=segment[0],long=segment[7],lat=segment[8],name=segment[1],value=segment[2]) logger.info(query) sql.send(query) except CartoException as e: logger.error(e)
def test_sql(api_key_auth_client_usr, mock_requests, do_post=True): with mock_requests.mocker: sql = SQLClient(api_key_auth_client_usr) data = sql.send('select * from ' + EXISTING_POINT_DATASET, do_post=do_post) assert data is not None assert 'rows' in data assert 'total_rows' in data assert 'time' in data assert len(data['rows']) > 0
def test_sql_unverified(non_verified_auth_client): if non_verified_auth_client is None: assert True is True return sql = SQLClient(non_verified_auth_client) data = sql.send('select version()') assert data is not None assert 'rows' in data assert 'total_rows' in data assert 'time' in data assert len(data['rows']) > 0
class DotCartoFile(object): replacements = [] visualization_id = None json = None def __init__(self, dotcarto_file, endpoint_base_url, api_key): self.dotcarto_file = dotcarto_file self.sql = SQLClient(APIKeyAuthClient(endpoint_base_url, api_key)) def replace_dataset(self, original_dataset_name, new_dataset_name): self.replacements.append({ "original_dataset_name": original_dataset_name, "new_dataset_name": new_dataset_name }) def replace_datasets_in_dotcarto_file(self, zip_buffer): for replacement in self.replacements: original_dataset_name = replacement["original_dataset_name"] new_dataset_name = replacement["new_dataset_name"] self.json = self.json.replace(original_dataset_name, new_dataset_name) new_dataset = self.sql.send("select * from {dataset}".format( dataset=replacement["new_dataset_name"]), format="gpkg") zip_buffer.writestr( join(self.visualization_id, replacement["new_dataset_name"] + ".gpkg"), new_dataset) def get_new(self, destination_path=None): with ZipFile(self.dotcarto_file) as original_dotcarto_file: self.visualization_id = original_dotcarto_file.namelist()[0][:-1] json_file_relative_path = join( self.visualization_id, self.visualization_id + ".carto.json") self.json = original_dotcarto_file.read(json_file_relative_path) if destination_path is not None: with ZipFile(destination_path, mode='w') as new_dotcarto_file: self.replace_datasets_in_dotcarto_file(new_dotcarto_file) new_dotcarto_file.writestr(json_file_relative_path, self.json.encode('utf-8')) else: new_dotcarto_buffer = StringIO() with ZipFile(new_dotcarto_buffer, mode='w') as new_dotcarto_file: self.replace_datasets_in_dotcarto_file(new_dotcarto_file) new_dotcarto_file.writestr(json_file_relative_path, self.json.encode('utf-8')) new_dotcarto_buffer.seek(0) return new_dotcarto_buffer
def query(self, sql_query, parse_json=True, do_post=True, format=None, write_qry=False): try: if not write_qry and self.__is_write_query(sql_query): raise CartoModelException("Aborted query. No write queries allowed.") auth_client = self.__get_auth_client(self.__carto_api_key, self.__carto_user) sql = SQLClient(auth_client, api_version='v2') res = sql.send(sql_query, parse_json, do_post, format) return res['rows'] except Exception as err: self.__logger.error("Error sending query to Carto: {0}\n{1}".format(err, sql_query)) raise CartoModelException(err)
def get_carto_username(self): """ Returns the user name for the client passed Returns: String with CARTO account name Raises: CartoException: an error thrown by the CARTO request Exception: some error in the client happened """ self.logger.debug("Getting the CARTO user name...") sql = SQLClient(self.auth_client) query = "SELECT CDB_Username()" q = sql.send(query) self.logger.debug(q) if "rows" in q and len(q["rows"]) == 1: return q["rows"][0]["cdb_username"] else: raise Exception("Your client is not valid")
def query(sql_query, parse_json=True, do_post=True, format=None, retries=5): log.debug(f"Query: {sql_query}") sql = SQLClient(_get_auth_client(), api_version="v2") res = None for retry_number in range(retries): try: res = sql.send(sql_query, parse_json, do_post, format) if res: break except CartoException as carto_exception: if retry_number == retries - 1: raise carto_exception else: time.sleep(5) continue if format is None: return res["rows"] return res
def insert_new_assets_into_carto(asset_dicts, fields): auth_client = APIKeyAuthClient(api_key=CARTO_API_KEY, base_url=USR_BASE_URL) sql = SQLClient(auth_client) # q = f"INSERT INTO {table_name} (id, name, asset_type, asset_type_title, category, category_title, latitude, longitude) VALUES (202020, 'Zyzzlvaria Zoo', 'zoo', 'animal places', 'cool_stuff', 'Cool Stuff', 40.5195849005734, -80.0445997570883 );" # results = sql.send(q) # Example of how to insert a single record: # q = f"INSERT INTO {table_name} (id, name, asset_type, asset_type_title, category, category_title, latitude, longitude) VALUES (112644, 'Ormsby Pool and Recreation Center', 'rec_centers', 'Rec Centers', 'civic', 'Civic', 40.4290817, -79.97429357);" # Batch inserts can be done like this: # INSERT INTO election_results (county_id,voters,pro) # VALUES (1, 11,8), # (12,21,10), # (78,31,27); # map set of records into value tuple strings #values_tuple_strings = [make_values_tuple_string(r, fields) for r in records] extra_fields = ['the_geom', 'the_geom_webmercator'] fields_extended = fields + extra_fields for a_dict in asset_dicts: for f in extra_fields: a_dict[f] = None values_tuple_strings = [ batch_values_string_from_model(a_dict, fields_extended) for a_dict in asset_dicts ] q = f"INSERT INTO {TABLE_NAME} ({', '.join(fields_extended)}) " \ f"VALUES {', '.join(values_tuple_strings)};" assert len(q) < 16384 print(q) results = sql.send(q)
def cleanup_obsolete_locations(carto_table_pk): try: carto_table = CartoDBTable.objects.get(pk=carto_table_pk) except CartoDBTable.DoesNotExist as e: logger.exception('Cannot retrieve CartoDBTable with pk: %s', carto_table_pk) raise e database_pcodes = [] for row in Location.objects.all_locations().filter( gateway=carto_table.location_type).values('p_code'): database_pcodes.append(row['p_code']) auth_client = LocationsCartoNoAuthClient( base_url="https://{}.carto.com/".format(carto_table.domain)) sql_client = SQLClient(auth_client) try: qry = sql_client.send( 'select array_agg({}) AS aggregated_pcodes from {}'.format( carto_table.pcode_col, carto_table.table_name, )) new_carto_pcodes = qry['rows'][0]['aggregated_pcodes'] \ if len(qry['rows']) > 0 and "aggregated_pcodes" in qry['rows'][0] else [] remapped_pcode_pairs = [] if carto_table.remap_table_name: remap_qry = 'select old_pcode::text, new_pcode::text from {}'.format( carto_table.remap_table_name) remapped_pcode_pairs = sql_client.send(remap_qry)['rows'] except CartoException as e: logger.exception( "CartoDB exception occured during the data validation.") raise e remapped_pcodes = [ remap_row['old_pcode'] for remap_row in remapped_pcode_pairs ] remapped_pcodes += [ remap_row['new_pcode'] for remap_row in remapped_pcode_pairs ] # select for deletion those pcodes which are not present in the Carto datasets in any form deleteable_pcodes = set(database_pcodes) - (set(new_carto_pcodes) | set(remapped_pcodes)) # Do a few safety checks before we actually delete a location, like: # - ensure that the deleted locations doesn't have any children in the location tree # - check if the deleted location was remapped before, do not delete if yes. # if the checks pass, add the deleteable location ID to the `revalidated_deleteable_pcodes` array so they can be # deleted in one go later revalidated_deleteable_pcodes = [] with transaction.atomic(): # prevent writing into locations until the cleanup is done Location.objects.all_locations().select_for_update().only('id') for deleteable_pcode in deleteable_pcodes: try: deleteable_location = Location.objects.all_locations().get( p_code=deleteable_pcode) except Location.DoesNotExist: logger.warning( "Cannot find orphaned pcode {}.".format(deleteable_pcode)) else: if deleteable_location.is_leaf_node(): secondary_parent_check = Location.objects.all_locations( ).filter(parent=deleteable_location.id).exists() remap_history_check = LocationRemapHistory.objects.filter( Q(old_location=deleteable_location) | Q(new_location=deleteable_location)).exists() if not secondary_parent_check and not remap_history_check: logger.info( "Selecting orphaned pcode {} for deletion".format( deleteable_location.p_code)) revalidated_deleteable_pcodes.append( deleteable_location.id) # delete the selected locations all at once, it seems it's faster like this compared to deleting them one by one. if revalidated_deleteable_pcodes: logger.info("Deleting selected orphaned pcodes") Location.objects.all_locations().filter( id__in=revalidated_deleteable_pcodes).delete() # rebuild location tree after the unneeded locations are cleaned up, because it seems deleting locations # sometimes leaves the location tree in a `bugged` state Location.objects.rebuild()
def test_sql_error(api_key_auth_client_usr): sql = SQLClient(api_key_auth_client_usr) with pytest.raises(CartoException): sql.send('select * from non_existing_dataset')
help='Set the base URL. For example:' + ' https://username.carto.com/ ' + '(defaults to env variable CARTO_API_URL)') parser.add_argument('--api_key', dest='CARTO_API_KEY', default=os.environ['CARTO_API_KEY'] if 'CARTO_API_KEY' in os.environ else '', help='Api key of the account' + ' (defaults to env variable CARTO_API_KEY)') args = parser.parse_args() if args.CARTO_BASE_URL and args.CARTO_API_KEY and args.organization: auth_client = APIKeyAuthClient( args.CARTO_BASE_URL, args.CARTO_API_KEY, args.organization) else: logger.error('You need to provide valid credentials, run with -h parameter for details') import sys sys.exit(1) # SQL wrapper sql = SQLClient(APIKeyAuthClient(args.CARTO_BASE_URL, args.CARTO_API_KEY)) queries = "SELECT pg_cancel_backend('" + args.pid + \ "') from pg_stat_activity where usename=current_user;" try: sql.send(queries) logger.info('Query killed') except: logger.warn('Something went wrong')
parser.add_argument('--api_key', dest='CARTO_API_KEY', default=os.environ['CARTO_API_KEY'] if 'CARTO_API_KEY' in os.environ else '', help='Api key of the account' + ' (defaults to env variable CARTO_API_KEY)') args = parser.parse_args() if args.CARTO_BASE_URL and args.CARTO_API_KEY and args.organization: auth_client = APIKeyAuthClient(args.CARTO_BASE_URL, args.CARTO_API_KEY, args.organization) else: logger.error( 'You need to provide valid credentials, run with -h parameter for details' ) import sys sys.exit(1) # SQL wrapper sql = SQLClient(APIKeyAuthClient(args.CARTO_BASE_URL, args.CARTO_API_KEY)) queries = "SELECT pg_cancel_backend('" + args.pid + \ "') from pg_stat_activity where usename=current_user;" try: sql.send(queries) logger.info('Query killed') except: logger.warn('Something went wrong')
table_user.align['Attribute'] = 'l' table_user.align['Value'] = 'l' for row in userInfo: table_user.add_row(row) print(table_user) # print('{name}: {value}').format(name=i,value=str(user.__dict__[i])) except Exception as e: logger.warn('User has no admin of its organization') # SQL wrapper sql = SQLClient(APIKeyAuthClient(args.CARTO_BASE_URL, args.CARTO_API_KEY)) # show quota of user results = [] print('\nThe quotas of the user are:\n') quota = sql.send( "SELECT * FROM cdb_dataservices_client.cdb_service_quota_info()") for k, v in quota.items(): if k == 'rows': for itr in v: results.append([ itr['service'], itr['used_quota'], itr['provider'], itr['soft_limit'], itr['monthly_quota'] ]) table = PrettyTable( ['Service', 'Provider', 'Soft limit', 'Used quota', 'Monthly quota']) table.align['Used quota'] = 'l' table.align['Provider'] = 'r' table.align['Soft limit'] = 'r' table.align['Service'] = 'r' table.align['Monthly quota'] = 'r'
i.table.name, i.table.row_count, str(round(i.table.size/1048576.00, 2)), str(i.table.privacy), str(i.table.geometry_types) ]) print(table_general) columns_table = "select column_name, data_type FROM information_schema.columns \ WHERE table_schema = '" + i.permission.owner.username + "'\ AND table_name ='" + i.table.name + "';" # print columns_table print('\nThe columns and their data types are: \n') columnAndTypes = sql.send(columns_table) for key, value in columnAndTypes.items(): if key == 'rows': for itr in value: results_col.append([ itr['column_name'], itr['data_type'] ]) table_col = PrettyTable( ['Column name', 'Data type']) table_col.align['Column name'] = 'l' table_col.align['Data type'] = 'r' for row in results_col: table_col.add_row(row) print(table_col)
class TestCartoContext(unittest.TestCase): """Tests for cartoframes.CartoContext""" def setUp(self): if (os.environ.get('APIKEY') is None or os.environ.get('USERNAME') is None): try: creds = json.loads(open('test/secret.json').read()) self.apikey = creds['APIKEY'] self.username = creds['USERNAME'] except: # noqa: E722 warnings.warn("Skipping CartoContext tests. To test it, " "create a `secret.json` file in test/ by " "renaming `secret.json.sample` to `secret.json` " "and updating the credentials to match your " "environment.") self.apikey = None self.username = None else: self.apikey = os.environ['APIKEY'] self.username = os.environ['USERNAME'] if self.username and self.apikey: self.baseurl = 'https://{username}.carto.com/'.format( username=self.username) self.auth_client = APIKeyAuthClient(base_url=self.baseurl, api_key=self.apikey) self.sql_client = SQLClient(self.auth_client) # sets client to be ci if not cartoframes.context.DEFAULT_SQL_ARGS['client']\ .endswith('_dev_ci'): cartoframes.context.DEFAULT_SQL_ARGS['client'] += '_dev_ci' # sets skip value WILL_SKIP = self.apikey is None or self.username is None # noqa: F841 # table naming info has_mpl = 'mpl' if os.environ.get('MPLBACKEND') else 'nonmpl' pyver = sys.version[0:3].replace('.', '_') # test tables self.test_read_table = 'cb_2013_us_csa_500k' self.valid_columns = set([ 'affgeoid', 'aland', 'awater', 'created_at', 'csafp', 'geoid', 'lsad', 'name', 'the_geom', 'updated_at' ]) table_args = dict(ver=pyver, mpl=has_mpl) # torque table self.test_point_table = 'tweets_obama' # for writing to carto self.test_write_table = 'cartoframes_test_table_{ver}_{mpl}'.format( **table_args) self.mixed_case_table = 'AbCdEfG_{0}_{1}'.format(pyver, has_mpl) # for batch writing to carto self.test_write_batch_table = ( 'cartoframes_test_batch_table_{ver}_{mpl}'.format(**table_args)) self.test_write_lnglat_table = ( 'cartoframes_test_write_lnglat_table_{ver}_{mpl}'.format( **table_args)) self.write_named_index = ( 'cartoframes_test_write_non_default_index_{ver}_{mpl}'.format( **table_args)) # for queries self.test_query_table = ('cartoframes_test_query_' 'table_{ver}_{mpl}'.format(**table_args)) self.test_delete_table = ('cartoframes_test_delete_' 'table_{ver}_{mpl}').format(**table_args) # for data observatory self.test_data_table = 'carto_usa_offices' def tearDown(self): """restore to original state""" tables = ( self.test_write_table, self.test_write_batch_table, self.test_write_lnglat_table, self.test_query_table, self.mixed_case_table.lower(), self.write_named_index, ) sql_drop = 'DROP TABLE IF EXISTS {};' if self.apikey and self.baseurl: cc = cartoframes.CartoContext(base_url=self.baseurl, api_key=self.apikey) for table in tables: cc.delete(table) cc.sql_client.send(sql_drop.format(table)) # TODO: remove the named map templates def add_map_template(self): """Add generated named map templates to class""" pass @unittest.skipIf(WILL_SKIP, 'Skipping test, no carto credentials found') def test_cartocontext(self): """context.CartoContext.__init__ normal usage""" cc = cartoframes.CartoContext(base_url=self.baseurl, api_key=self.apikey) self.assertEqual(cc.creds.key(), self.apikey) self.assertEqual(cc.creds.base_url(), self.baseurl.strip('/')) self.assertEqual(cc.creds.username(), self.username) self.assertTrue(not cc.is_org) with self.assertRaises(CartoException): cartoframes.CartoContext(base_url=self.baseurl, api_key='notavalidkey') @unittest.skipIf(WILL_SKIP, 'no carto credentials, skipping this test') def test_cartocontext_credentials(self): """context.CartoContext.__init__ Credentials argument""" creds = cartoframes.Credentials(username=self.username, key=self.apikey) cc = cartoframes.CartoContext(creds=creds) self.assertIsInstance(cc, cartoframes.CartoContext) self.assertEqual(cc.creds.username(), self.username) self.assertEqual(cc.creds.key(), self.apikey) # CartoContext pulls from saved credentials saved_creds = cartoframes.Credentials(username=self.username, key=self.apikey) saved_creds.save() cc_saved = cartoframes.CartoContext() self.assertEqual(cc_saved.creds.key(), self.apikey) @unittest.skipIf(WILL_SKIP, 'no carto credentials, skipping this test') def test_cartocontext_isorguser(self): """context.CartoContext._is_org_user""" cc = cartoframes.CartoContext(base_url=self.baseurl, api_key=self.apikey) self.assertTrue(not cc._is_org_user()) @unittest.skipIf(WILL_SKIP, 'no carto credentials, skipping this test') def test_cartocontext_read(self): """context.CartoContext.read""" cc = cartoframes.CartoContext(base_url=self.baseurl, api_key=self.apikey) # fails if limit is smaller than zero with self.assertRaises(ValueError): df = cc.read('sea_horses', limit=-10) # fails if not an int with self.assertRaises(ValueError): df = cc.read('sea_horses', limit=3.14159) with self.assertRaises(ValueError): df = cc.read('sea_horses', limit='acadia') # fails on non-existent table with self.assertRaises(CartoException): df = cc.read('non_existent_table') # normal table df = cc.read(self.test_read_table) self.assertSetEqual(set(df.columns), self.valid_columns) self.assertTrue(len(df) == 169) # read with limit df = cc.read(self.test_read_table, limit=10) self.assertEqual(len(df), 10) self.assertIsInstance(df, pd.DataFrame) # read empty table/dataframe df = cc.read(self.test_read_table, limit=0) self.assertSetEqual(set(df.columns), self.valid_columns) self.assertEqual(len(df), 0) self.assertIsInstance(df, pd.DataFrame) @unittest.skipIf(WILL_SKIP, 'no carto credentials, skipping this test') def test_cartocontext_write(self): """context.CartoContext.write normal usage""" from cartoframes.context import MAX_ROWS_LNGLAT cc = cartoframes.CartoContext(base_url=self.baseurl, api_key=self.apikey) data = { 'nums': list(range(100, 0, -1)), 'category': [random.choice('abcdefghijklmnop') for _ in range(100)], 'lat': [0.01 * i for i in range(100)], 'long': [-0.01 * i for i in range(100)] } schema = { 'nums': int, 'category': 'object', 'lat': float, 'long': float } df = pd.DataFrame(data).astype(schema) cc.write(df, self.test_write_table) # check if table exists resp = self.sql_client.send(''' SELECT * FROM {table} LIMIT 0 '''.format(table=self.test_write_table)) self.assertIsNotNone(resp) # check that table has same number of rows resp = self.sql_client.send(''' SELECT count(*) FROM {table}'''.format(table=self.test_write_table)) self.assertEqual(resp['rows'][0]['count'], len(df)) # should error for existing table with self.assertRaises(NameError): cc.write(df, self.test_read_table, overwrite=False) # overwrite table and create the_geom column cc.write(df, self.test_write_table, overwrite=True, lnglat=('long', 'lat')) resp = self.sql_client.send(''' SELECT count(*) AS num_rows, count(the_geom) AS num_geoms FROM {table} '''.format(table=self.test_write_table)) # number of geoms should equal number of rows self.assertEqual(resp['rows'][0]['num_rows'], resp['rows'][0]['num_geoms']) # test batch lnglat behavior n_rows = MAX_ROWS_LNGLAT + 1 df = pd.DataFrame({ 'latvals': [random.random() for r in range(n_rows)], 'lngvals': [random.random() for r in range(n_rows)] }) job = cc.write(df, self.test_write_lnglat_table, lnglat=('lngvals', 'latvals')) self.assertIsInstance(job, cartoframes.context.BatchJobStatus) # test batch writes n_rows = 550000 df = pd.DataFrame({'vals': [random.random() for r in range(n_rows)]}) cc.write(df, self.test_write_batch_table) resp = self.sql_client.send(''' SELECT count(*) AS num_rows FROM {table} '''.format(table=self.test_write_batch_table)) # number of rows same in dataframe and carto table self.assertEqual(n_rows, resp['rows'][0]['num_rows']) cols = self.sql_client.send(''' SELECT * FROM {table} LIMIT 1 '''.format(table=self.test_write_batch_table)) expected_schema = { 'vals': { 'type': 'number' }, 'the_geom': { 'type': 'geometry' }, 'the_geom_webmercator': { 'type': 'geometry' }, 'cartodb_id': { 'type': 'number' } } # table should be properly created # util columns + new column of type number self.assertDictEqual(cols['fields'], expected_schema) # test properly encoding df = pd.DataFrame({'vals': [1, 2], 'strings': ['a', 'ô']}) cc.write(df, self.test_write_table, overwrite=True) # check if table exists resp = self.sql_client.send(''' SELECT * FROM {table} LIMIT 0 '''.format(table=self.test_write_table)) self.assertIsNotNone(resp) cc.delete(self.test_write_table) df = pd.DataFrame({'vals': list('abcd'), 'ids': list('wxyz')}) df = df.astype({'vals': str, 'ids': str}) cc.write(df, self.test_write_table) schema = cc.sql_client.send('select ids, vals from {}'.format( self.test_write_table))['fields'] self.assertSetEqual(set([schema[c]['type'] for c in schema]), set(('string', ))) df = pd.DataFrame({ 'vals': list('abcd'), 'ids': list('wxyz'), 'nums': [1.2 * i for i in range(4)], 'boolvals': [ True, False, None, True, ], }) cc.write(df, self.test_write_table, overwrite=True, type_guessing='true') resp = cc.sql_client.send('SELECT * FROM {}'.format( self.test_write_table))['fields'] schema = {k: v['type'] for k, v in dict_items(resp)} ans = dict(vals='string', ids='string', nums='number', boolvals='boolean', the_geom='geometry', the_geom_webmercator='geometry', cartodb_id='number') self.assertDictEqual(schema, ans) @unittest.skipIf(WILL_SKIP, 'updates privacy of existing dataset') def test_write_privacy(self): """context.CartoContext.write Updates the privacy of a dataset""" from carto.datasets import DatasetManager cc = cartoframes.CartoContext(base_url=self.baseurl, api_key=self.apikey) ds_manager = DatasetManager(self.auth_client) df = pd.DataFrame({'ids': list('abcd'), 'vals': range(4)}) cc.write(df, self.test_write_table) dataset = ds_manager.get(self.test_write_table) self.assertEqual(dataset.privacy.lower(), 'private') df = pd.DataFrame({'ids': list('efgh'), 'vals': range(4, 8)}) cc.write(df, self.test_write_table, overwrite=True, privacy='public') dataset = ds_manager.get(self.test_write_table) self.assertEqual(dataset.privacy.lower(), 'public') privacy = cc._get_privacy('i_am_not_a_table_in_this_account') self.assertIsNone(privacy) @unittest.skipIf(WILL_SKIP, 'no carto credentials, skipping') def test_cartocontext_write_index(self): """context.CartoContext.write with non-default index""" cc = cartoframes.CartoContext(base_url=self.baseurl, api_key=self.apikey) df = pd.DataFrame({ 'vals': range(3), 'ids': list('abc') }, index=list('xyz')) df.index.name = 'named_index' cc.write(df, self.write_named_index) df_index = cc.read(self.write_named_index) self.assertSetEqual(set(('the_geom', 'vals', 'ids', 'named_index')), set(df_index.columns)) @unittest.skipIf(WILL_SKIP, 'no carto credentials, skipping') def test_cartocontext_mixed_case(self): """context.CartoContext.write table name mixed case""" cc = cartoframes.CartoContext(base_url=self.baseurl, api_key=self.apikey) data = pd.DataFrame({'a': [1, 2, 3], 'B': list('abc')}) cc.write(pd.DataFrame(data), self.mixed_case_table) @unittest.skipIf(WILL_SKIP, 'no carto credentials, skipping') def test_cartocontext_table_exists(self): """context.CartoContext._table_exists""" cc = cartoframes.CartoContext(base_url=self.baseurl, api_key=self.apikey) self.assertFalse(cc._table_exists('acadia_biodiversity')) with self.assertRaises(NameError): cc._table_exists(self.test_read_table) def test_cartocontext_delete(self): """context.CartoContext.delete""" cc = cartoframes.CartoContext(base_url=self.baseurl, api_key=self.apikey) data = {'col1': [1, 2, 3], 'col2': ['a', 'b', 'c']} df = pd.DataFrame(data) cc.write(df, self.test_delete_table) cc.delete(self.test_delete_table) # check that querying recently deleted table raises an exception with self.assertRaises(CartoException): cc.sql_client.send('select * from {}'.format( self.test_delete_table)) # try to delete a table that does not exists with warnings.catch_warnings(record=True) as w: # Cause all warnings to always be triggered. warnings.simplefilter("always") # Trigger a warning. cc.delete('non_existent_table') # Verify one warning, subclass is UserWarning, and expected message # is in warning assert len(w) == 1 assert issubclass(w[-1].category, UserWarning) assert "Failed to delete" in str(w[-1].message) def test_cartocontext_send_dataframe(self): """context.CartoContext._send_dataframe""" pass def test_cartocontext_handle_import(self): """context.CartoContext._handle_import""" cc = cartoframes.CartoContext(base_url=self.baseurl, api_key=self.apikey) import_failures = ( dict(error_code=8001, state='failure'), dict(error_code=6668, state='failure'), dict(error_code=1234, state='failure'), ) for import_job in import_failures: with self.assertRaises(CartoException): cc._handle_import(import_job, 'foo') diff_table_err = dict(state='complete', table_name='bar') with self.assertRaises(Exception): cc._handle_import(diff_table_err, 'foo') @unittest.skipIf(WILL_SKIP, 'no carto credentials, skipping') def test_cartoframes_sync(self): """context.CartoContext.sync""" cc = cartoframes.CartoContext(base_url=self.baseurl, api_key=self.apikey) self.assertIsNone(cc.sync(pd.DataFrame(), 'acadia')) @unittest.skipIf(WILL_SKIP, 'no carto credentials, skipping') def test_cartoframes_query(self): """context.CartoContext.query""" cc = cartoframes.CartoContext(base_url=self.baseurl, api_key=self.apikey) cols = ( 'link', 'body', 'displayname', 'friendscount', 'postedtime', ) df = cc.query(''' SELECT {cols}, '02-06-1429'::date as invalid_df_date FROM tweets_obama LIMIT 100 '''.format(cols=','.join(cols))) # ensure columns are in expected order df = df[list(cols) + ['invalid_df_date']] # same number of rows self.assertEqual(len(df), 100, msg='Expected number or rows') # same type of object self.assertIsInstance(df, pd.DataFrame, 'Should be a pandas DataFrame') # same column names requested_cols = { 'link', 'body', 'displayname', 'friendscount', 'postedtime', 'invalid_df_date', } self.assertSetEqual(requested_cols, set(df.columns), msg='Should have the columns requested') # should have exected schema expected_dtypes = ( 'object', 'object', 'object', 'float64', 'datetime64[ns]', 'object', ) self.assertTupleEqual(expected_dtypes, tuple(str(d) for d in df.dtypes), msg='Should have expected schema') # empty response df_empty = cc.query(''' SELECT 1 LIMIT 0 ''') # no rows, one column self.assertTupleEqual(df_empty.shape, (0, 1)) # is a DataFrame self.assertIsInstance(df_empty, pd.DataFrame) # table already exists, should throw CartoException with self.assertRaises(CartoException): cc.query(''' SELECT link, body, displayname, friendscount FROM tweets_obama LIMIT 100 ''', table_name='tweets_obama') # create a table from a query cc.query(''' SELECT link, body, displayname, friendscount FROM tweets_obama LIMIT 100 ''', table_name=self.test_query_table) # read newly created table into a dataframe df = cc.read(self.test_query_table) # should be specified length self.assertEqual(len(df), 100) # should have requested columns + utility columns from CARTO self.assertSetEqual( { 'link', 'body', 'displayname', 'friendscount', 'the_geom', }, set(df.columns), msg='Should have the columns requested') # see what happens if a query fails after 100 successful rows with self.assertRaises(CartoException): cc.query(''' WITH cte AS ( SELECT CDB_LatLng(0, 0) as the_geom, i FROM generate_series(1, 110) as m(i) UNION ALL SELECT ST_Buffer(CDB_LatLng(0, 0), 0.1) as the_geom, i FROM generate_series(111, 120) as i ) SELECT ST_X(the_geom) as xval, ST_Y(the_geom) as yval FROM cte ''') @unittest.skipIf(WILL_SKIP, 'no carto credentials, skipping this test') def test_cartocontext_map(self): """context.CartoContext.map normal usage""" from cartoframes import Layer, QueryLayer, BaseMap try: import matplotlib matplotlib.use('agg') import matplotlib.pyplot as plt except ImportError: plt = None cc = cartoframes.CartoContext(base_url=self.baseurl, api_key=self.apikey) # test with no layers - should produce basemap if plt: basemap_only_static_mpl = cc.map(interactive=False) cartoframes.context.HAS_MATPLOTLIB = False basemap_only_static = cc.map(interactive=False) basemap_only_interactive = cc.map(interactive=True) # are of the correct type instances if plt: self.assertIsInstance(basemap_only_static_mpl, plt.Axes) self.assertIsInstance(basemap_only_static, IPython.core.display.Image) self.assertIsInstance(basemap_only_interactive, IPython.core.display.HTML) # have the HTML innards that are to be expected if sys.version[0] == 3: self.assertRegex(basemap_only_static.data, ('^<img src="https://.*api/v1/map/static/named/' 'cartoframes_ver.*" />$')) self.assertRegex(basemap_only_interactive.data, '^<iframe srcdoc="<!DOCTYPE html>.*') elif sys.version[0] == 2: self.assertRegexMatches( basemap_only_static.data, ('^<img src="https://.*api/v1/map/static/named/' 'cartoframes_ver.*" />$')) self.assertRegexMatches(basemap_only_interactive.data, '^<iframe srcdoc="<!DOCTYPE html>.*') # test with labels on front labels_front = cc.map(layers=BaseMap('light', labels='front')) self.assertIsInstance(labels_front, IPython.core.display.HTML) # test with one Layer one_layer = cc.map(layers=Layer('tweets_obama')) self.assertIsInstance(one_layer, IPython.core.display.HTML) # test with two Layers two_layers = cc.map( layers=[Layer('tweets_obama'), Layer(self.test_read_table)]) self.assertIsInstance(two_layers, IPython.core.display.HTML) # test with one Layer, one QueryLayer onelayer_onequery = cc.map(layers=[ QueryLayer(''' SELECT * FROM tweets_obama LIMIT 100'''), Layer(self.test_read_table) ]) self.assertIsInstance(onelayer_onequery, IPython.core.display.HTML) # test with BaseMap, Layer, QueryLayer cc.map(layers=[ BaseMap('light'), QueryLayer(''' SELECT * FROM tweets_obama LIMIT 100''', color='favoritescount'), Layer(self.test_read_table) ]) # Errors # too many layers with self.assertRaises(ValueError): layers = [Layer('tweets_obama')] * 9 cc.map(layers=layers) # zoom needs to be specified with lng/lat with self.assertRaises(ValueError): cc.map(lng=44.3386, lat=68.2733) # only one basemap layer can be added with self.assertRaises(ValueError): cc.map(layers=[BaseMap('dark'), BaseMap('light')]) # only one time layer can be added with self.assertRaises(ValueError): cc.map(layers=[ Layer(self.test_read_table, time='cartodb_id'), Layer(self.test_read_table, time='cartodb_id') ]) # no geometry with self.assertRaises(ValueError): cc.map(layers=QueryLayer(''' SELECT null::geometry as the_geom, null::geometry as the_geom_webmercator, row_number() OVER () as cartodb_id FROM generate_series(1, 10) as m(i) ''')) @unittest.skipIf(WILL_SKIP, 'no cartocredentials, skipping') def test_cartocontext_map_time(self): """context.CartoContext.map time options""" from cartoframes import Layer cc = cartoframes.CartoContext(base_url=self.baseurl, api_key=self.apikey) html_map = cc.map( layers=Layer(self.test_point_table, time='cartodb_id')) self.assertIsInstance(html_map, IPython.core.display.HTML) # category map cat_map = cc.map(layers=Layer(self.test_point_table, time='actor_postedtime', color='twitter_lang')) self.assertRegexpMatches(cat_map.__html__(), '.*CDB_Math_Mode\(cf_value_twitter_lang\).*') with self.assertRaises( ValueError, msg='cannot create static torque maps currently'): cc.map(layers=Layer(self.test_point_table, time='cartodb_id'), interactive=False) with self.assertRaises(ValueError, msg='cannot have more than one torque layer'): cc.map(layers=[ Layer(self.test_point_table, time='cartodb_id'), Layer(self.test_point_table, color='cartodb_id') ]) with self.assertRaises( ValueError, msg='cannot do a torque map off a polygon dataset'): cc.map(layers=Layer(self.test_read_table, time='cartodb_id')) @unittest.skipIf(WILL_SKIP, 'no carto credentials, skipping this test') def test_cartocontext_map_geom_type(self): """context.CartoContext.map basemap geometry type defaults""" from cartoframes import Layer, QueryLayer cc = cartoframes.CartoContext(base_url=self.baseurl, api_key=self.apikey) # baseid1 = dark, labels1 = labels on top in named map name labels_polygon = cc.map(layers=Layer(self.test_read_table)) self.assertRegexpMatches(labels_polygon.__html__(), '.*baseid2_labels1.*', msg='labels should be on top since only a ' 'polygon layer is present') # baseid2 = voyager, labels0 = labels on bottom labels_point = cc.map(layers=Layer(self.test_point_table)) self.assertRegexpMatches(labels_point.__html__(), '.*baseid2_labels0.*', msg='labels should be on bottom because a ' 'point layer is present') labels_multi = cc.map( layers=[Layer(self.test_point_table), Layer(self.test_read_table)]) self.assertRegexpMatches(labels_multi.__html__(), '.*baseid2_labels0.*', msg='labels should be on bottom because a ' 'point layer is present') # create a layer with points and polys, but with more polys # should default to poly layer (labels on top) multi_geom_layer = QueryLayer(''' (SELECT the_geom, the_geom_webmercator, row_number() OVER () AS cartodb_id FROM "{polys}" WHERE the_geom IS NOT null LIMIT 10) UNION ALL (SELECT the_geom, the_geom_webmercator, (row_number() OVER ()) + 10 AS cartodb_id FROM "{points}" WHERE the_geom IS NOT null LIMIT 5) '''.format(polys=self.test_read_table, points=self.test_point_table)) multi_geom = cc.map(layers=multi_geom_layer) self.assertRegexpMatches(multi_geom.__html__(), '.*baseid2_labels1.*', msg='layer has more polys than points, so it ' 'should default to polys labels (on top)') @unittest.skipIf(WILL_SKIP, 'no carto credentials, skipping') def test_get_bounds(self): """context.CartoContext._get_bounds""" from cartoframes.layer import QueryLayer cc = cartoframes.CartoContext(base_url=self.baseurl, api_key=self.apikey) vals1 = {'minx': 0, 'maxx': 1, 'miny': 0, 'maxy': 2} vals2 = {'minx': 0, 'maxx': 1.5, 'miny': -0.5, 'maxy': 1.5} ans = {'west': 0, 'east': 1.5, 'south': -0.5, 'north': 2} # (MINX, MINY), (MINX, MAXY), (MAXX, MAXY), (MAXX, MINY), (MINX, MINY) # https://postgis.net/docs/ST_Envelope.html query = ''' WITH cte AS ( SELECT 'SRID=4326;POLYGON(({minx} {miny}, {minx} {maxy}, {maxx} {maxy}, {maxx} {miny}, {minx} {miny}))'::geometry AS the_geom ) SELECT 1 AS cartodb_id, the_geom, ST_Transform(the_geom, 3857) AS the_geom_webmercator FROM cte ''' layers = [ QueryLayer(query.format(**vals1)), QueryLayer(query.format(**vals2)) ] extent_ans = cc._get_bounds(layers) self.assertDictEqual(extent_ans, ans) @unittest.skipIf(WILL_SKIP, 'no carto credentials, skipping this test') def test_cartocontext_check_query(self): """context.CartoContext._check_query""" cc = cartoframes.CartoContext(base_url=self.baseurl, api_key=self.apikey) # this table does not exist in this account fail_query = ''' SELECT * FROM cyclists ''' fail_cols = ['merckx', 'moser', 'gimondi'] with self.assertRaises(ValueError): cc._check_query(fail_query, style_cols=fail_cols) # table exists success_query = ''' SELECT * FROM {} '''.format(self.test_read_table) self.assertIsNone(cc._check_query(success_query)) # table exists but columns don't with self.assertRaises(ValueError): cc._check_query(success_query, style_cols=fail_cols) def test_df2pg_schema(self): """context._df2pg_schema""" from cartoframes.context import _df2pg_schema data = [{ 'id': 'a', 'val': 1.1, 'truth': True, 'idnum': 1 }, { 'id': 'b', 'val': 2.2, 'truth': True, 'idnum': 2 }, { 'id': 'c', 'val': 3.3, 'truth': False, 'idnum': 3 }] df = pd.DataFrame(data).astype({ 'id': 'object', 'val': float, 'truth': bool, 'idnum': int }) # specify order of columns df = df[['id', 'val', 'truth', 'idnum']] pgcols = ['id', 'val', 'truth', 'idnum'] ans = ('NULLIF("id", \'\')::text AS id, ' 'NULLIF("val", \'\')::numeric AS val, ' 'NULLIF("truth", \'\')::boolean AS truth, ' 'NULLIF("idnum", \'\')::numeric AS idnum') self.assertEqual(ans, _df2pg_schema(df, pgcols)) # add the_geom df['the_geom'] = 'Point(0 0)' ans = '\"the_geom\", ' + ans pgcols.append('the_geom') self.assertEqual(ans, _df2pg_schema(df, pgcols)) @unittest.skipIf(WILL_SKIP, 'no carto credentials, skipping this test') def test_add_encoded_geom(self): """context._add_encoded_geom""" from cartoframes.context import _add_encoded_geom, _encode_geom cc = cartoframes.CartoContext(base_url=self.baseurl, api_key=self.apikey) # encode_geom=True adds a column called 'geometry' df = cc.read(self.test_read_table, limit=5, decode_geom=True) # alter the geometry df['geometry'] = df['geometry'].apply(lambda x: x.buffer(0.1)) # the_geom should reflect encoded 'geometry' column _add_encoded_geom(df, 'geometry') # geometry column should equal the_geom after function call self.assertTrue(df['the_geom'].equals( df['geometry'].apply(_encode_geom))) # don't specify geometry column (should exist since decode_geom==True) df = cc.read(self.test_read_table, limit=5, decode_geom=True) df['geometry'] = df['geometry'].apply(lambda x: x.buffer(0.2)) # the_geom should reflect encoded 'geometry' column _add_encoded_geom(df, None) # geometry column should equal the_geom after function call self.assertTrue(df['the_geom'].equals( df['geometry'].apply(_encode_geom))) df = cc.read(self.test_read_table, limit=5) # raise error if 'geometry' column does not exist with self.assertRaises(KeyError): _add_encoded_geom(df, None) def test_decode_geom(self): """context._decode_geom""" from cartoframes.context import _decode_geom # Point (0, 0) without SRID ewkb = '010100000000000000000000000000000000000000' decoded_geom = _decode_geom(ewkb) self.assertEqual(decoded_geom.wkt, 'POINT (0 0)') self.assertIsNone(_decode_geom(None)) def test_encode_geom(self): """context._encode_geom""" from cartoframes.context import _encode_geom from shapely import wkb import binascii as ba # Point (0 0) without SRID ewkb = '010100000000000000000000000000000000000000' geom = wkb.loads(ba.unhexlify(ewkb)) ewkb_resp = _encode_geom(geom) self.assertEqual(ewkb_resp, ewkb) self.assertIsNone(_encode_geom(None)) def test_dtypes2pg(self): """context._dtypes2pg""" from cartoframes.context import _dtypes2pg results = { 'float64': 'numeric', 'int64': 'numeric', 'float32': 'numeric', 'int32': 'numeric', 'object': 'text', 'bool': 'boolean', 'datetime64[ns]': 'timestamp', 'unknown_dtype': 'text' } for i in results: self.assertEqual(_dtypes2pg(i), results[i]) def test_pg2dtypes(self): """context._pg2dtypes""" from cartoframes.context import _pg2dtypes results = { 'date': 'datetime64[ns]', 'number': 'float64', 'string': 'object', 'boolean': 'bool', 'geometry': 'object', 'unknown_pgdata': 'object' } for i in results: result = _pg2dtypes(i) self.assertEqual(result, results[i]) def test_debug_print(self): """context._debug_print""" cc = cartoframes.CartoContext(base_url=self.baseurl, api_key=self.apikey, verbose=True) # request-response usage resp = requests.get('http://httpbin.org/get') cc._debug_print(resp=resp) cc._debug_print(resp=resp.text) # non-requests-response usage test_str = 'this is a test' long_test_str = ', '.join([test_str] * 100) self.assertIsNone(cc._debug_print(test_str=test_str)) self.assertIsNone(cc._debug_print(long_str=long_test_str)) # verbose = False test cc = cartoframes.CartoContext(base_url=self.baseurl, api_key=self.apikey, verbose=False) self.assertIsNone(cc._debug_print(resp=test_str)) def test_data_boundaries(self): """context.CartoContext.data_boundaries""" cc = cartoframes.CartoContext(base_url=self.baseurl, api_key=self.apikey) # all boundary metadata boundary_meta = cc.data_boundaries() self.assertTrue(boundary_meta.shape[0] > 0, msg='has non-zero number of boundaries') meta_cols = set(( 'geom_id', 'geom_tags', 'geom_type', )) self.assertTrue(meta_cols & set(boundary_meta.columns)) # boundary metadata in a region regions = ( self.test_read_table, self.test_data_table, [5.9559111595, 45.8179931641, 10.4920501709, 47.808380127], 'Australia', ) for region in regions: boundary_meta = cc.data_boundaries(region=region) self.assertTrue(meta_cols & set(boundary_meta.columns)) self.assertTrue(boundary_meta.shape[0] > 0, msg='has non-zero number of boundaries') # boundaries for world boundaries = cc.data_boundaries(boundary='us.census.tiger.state') self.assertTrue(boundaries.shape[0] > 0) self.assertEqual(boundaries.shape[1], 2) self.assertSetEqual(set(( 'the_geom', 'geom_refs', )), set(boundaries.columns)) # boundaries for region boundaries = ('us.census.tiger.state', ) for b in boundaries: geoms = cc.data_boundaries(boundary=b, region=self.test_data_table) self.assertTrue(geoms.shape[0] > 0) self.assertEqual(geoms.shape[1], 2) self.assertSetEqual(set(( 'the_geom', 'geom_refs', )), set(geoms.columns)) # presence or lack of clipped boundaries nonclipped = ( True, False, ) for tf in nonclipped: meta = cc.data_boundaries(include_nonclipped=tf) self.assertEqual('us.census.tiger.state' in set(meta.geom_id), tf) with self.assertRaises(ValueError): cc.data_boundaries(region=[1, 2, 3]) with self.assertRaises(ValueError): cc.data_boundaries(region=10) def test_data_discovery(self): """context.CartoContext.data_discovery""" cc = cartoframes.CartoContext(base_url=self.baseurl, api_key=self.apikey) meta = cc.data_discovery(self.test_read_table, keywords=('poverty', ), time=('2010 - 2014', )) meta_columns = set( ('denom_aggregate', 'denom_colname', 'denom_description', 'denom_geomref_colname', 'denom_id', 'denom_name', 'denom_reltype', 'denom_t_description', 'denom_tablename', 'denom_type', 'geom_colname', 'geom_description', 'geom_geomref_colname', 'geom_id', 'geom_name', 'geom_t_description', 'geom_tablename', 'geom_timespan', 'geom_type', 'id', 'max_score_rank', 'max_timespan_rank', 'normalization', 'num_geoms', 'numer_aggregate', 'numer_colname', 'numer_description', 'numer_geomref_colname', 'numer_id', 'numer_name', 'numer_t_description', 'numer_tablename', 'numer_timespan', 'numer_type', 'score', 'score_rank', 'score_rownum', 'suggested_name', 'target_area', 'target_geoms', 'timespan_rank', 'timespan_rownum')) self.assertSetEqual(set(meta.columns), meta_columns, msg='metadata columns are all there') self.assertTrue((meta['numer_timespan'] == '2010 - 2014').all()) self.assertTrue( (meta['numer_description'].str.contains('poverty')).all()) # test region = list of lng/lats with self.assertRaises(ValueError): cc.data_discovery([1, 2, 3]) switzerland = [ 5.9559111595, 45.8179931641, 10.4920501709, 47.808380127 ] dd = cc.data_discovery(switzerland, keywords='freight', time='2010') self.assertEqual(dd['numer_id'][0], 'eu.eurostat.tgs00078') dd = cc.data_discovery('Australia', regex='.*Torres Strait Islander.*') for nid in dd['numer_id'].values: self.assertRegexpMatches( nid, '^au\.data\.B01_Indig_[A-Za-z_]+Torres_St[A-Za-z_]+[FMP]$') with self.assertRaises(CartoException): cc.data_discovery('non_existent_table_abcdefg') dd = cc.data_discovery('United States', boundaries='us.epa.huc.hydro_unit', time=( '2006', '2010', )) self.assertTrue(dd.shape[0] >= 1) poverty = cc.data_discovery('United States', boundaries='us.census.tiger.census_tract', keywords=[ 'poverty status', ], time='2011 - 2015', include_quantiles=False) df_quantiles = poverty[poverty.numer_aggregate == 'quantile'] self.assertEqual(df_quantiles.shape[0], 0) poverty = cc.data_discovery('United States', boundaries='us.census.tiger.census_tract', keywords=[ 'poverty status', ], time='2011 - 2015', include_quantiles=True) df_quantiles = poverty[poverty.numer_aggregate == 'quantile'] self.assertTrue(df_quantiles.shape[0] > 0) def test_data(self): """context.CartoContext.data""" cc = cartoframes.CartoContext(base_url=self.baseurl, api_key=self.apikey) meta = cc.data_discovery(self.test_read_table, keywords=('poverty', ), time=('2010 - 2014', )) data = cc.data(self.test_data_table, meta) anscols = set(meta['suggested_name']) origcols = set(cc.read(self.test_data_table, limit=1).columns) self.assertSetEqual(anscols, set(data.columns) - origcols) meta = [ { 'numer_id': 'us.census.acs.B19013001', 'geom_id': 'us.census.tiger.block_group', 'numer_timespan': '2011 - 2015' }, ] data = cc.data(self.test_data_table, meta) self.assertSetEqual(set(('median_income_2011_2015', )), set(data.columns) - origcols) # with self.assertRaises(NotImplementedError): # cc.data(self.test_data_table, meta, how='geom_ref') with self.assertRaises(ValueError, msg='no measures'): meta = cc.data_discovery('United States', keywords='not a measure') cc.data(self.test_read_table, meta) with self.assertRaises(ValueError, msg='too many metadata measures'): # returns ~180 measures meta = cc.data_discovery(region='united states', keywords='education') cc.data(self.test_read_table, meta) with self.assertRaises(NameError, msg='column name already exists'): meta = cc.data_discovery(region='united states', time='2006 - 2010', regex='.*walked to work.*', boundaries='us.census.tiger.census_tract') cc.data(self.test_data_table, meta)
args.CARTO_BASE_URL, args.CARTO_API_KEY, args.organization) else: logger.error('You need to provide valid credentials, run with -h parameter for details') import sys sys.exit(1) # SQL wrapper sql = SQLClient(APIKeyAuthClient(args.CARTO_BASE_URL, args.CARTO_API_KEY)) # Dataset manager dataset_manager = DatasetManager(auth_client) # Get all datasets from account datasets = dataset_manager.all() # loop over all datasets from account for tablename in datasets: query = 'SELECT * FROM {table_name}'.format(table_name=tablename.name) try: result = sql.send(query, format=args.EXPORT_FORMAT) except Exception as e: logger.error(str(e)) break data_folder = Path(args.SAVE_FOLDER) / "{table_name}.{format}".format(table_name=tablename.name,format=args.EXPORT_FORMAT) # write file to files folder try: data_folder.write_bytes(result) except Exception as e: logger.error(str(e)) break
args = parser.parse_args() # Set authentification to CARTO if args.CARTO_BASE_URL and args.CARTO_API_KEY: auth_client = APIKeyAuthClient(args.CARTO_BASE_URL, args.CARTO_API_KEY) else: logger.error('You need to provide valid credentials, run with ' '-h parameter for details') sys.exit(1) # Create and cartodbfy a table sqlClient = SQLClient(auth_client) sqlClient.send(""" CREATE TABLE IF NOT EXISTS copy_example ( the_geom geometry(Geometry,4326), name text, age integer ) """) sqlClient.send("SELECT CDB_CartodbfyTable(current_schema, 'copy_example')") copyClient = CopySQLClient(auth_client) # COPY FROM example logger.info("COPY'ing FROM file...") query = ('COPY copy_example (the_geom, name, age) ' 'FROM stdin WITH (FORMAT csv, HEADER true)') result = copyClient.copyfrom_file_path(query, 'files/copy_from.csv') logger.info('result = %s' % result) # COPY TO example
sys.exit(1) # SQL wrapper sql = SQLClient(APIKeyAuthClient(args.CARTO_BASE_URL, args.CARTO_API_KEY)) # get username from base_url substring = re.search('https://(.+?).carto.com', args.CARTO_BASE_URL) if substring: username = substring.group(1) # check all table name of account all_tables = [] tables = sql.send( "select pg_class.relname from pg_class, pg_roles, pg_namespace" + " where pg_roles.oid = pg_class.relowner and " + "pg_roles.rolname = current_user " + "and pg_namespace.oid = pg_class.relnamespace and pg_class.relkind = 'r'") q = "select \ 'CREATE TABLE ' || relname || E'\n(\n' || \ array_to_string( \ array_agg( \ ' ' || column_name || ' ' || type || ' '|| not_null \ ) \ , E',\n' \ ) || E'\n);\n' as create_table \ from \ ( \ select \ distinct on (column_name) c.relname, a.attname AS column_name, \
def test_no_auth_sql_error_get(no_auth_client): sql = SQLClient(no_auth_client) with pytest.raises(CartoException): sql.send('select * from non_existing_dataset', {'do_post': False})
def delete_from_carto_by_id(asset_id): auth_client = APIKeyAuthClient(api_key=CARTO_API_KEY, base_url=USR_BASE_URL) sql = SQLClient(auth_client) results = sql.send(f"DELETE from {TABLE_NAME} WHERE id ='{asset_id}'") return results
class CartoContext(object): """Manages connections with CARTO for data and map operations. Modeled after `SparkContext <https://jaceklaskowski.gitbooks.io/mastering-apache-spark/content/spark-sparkcontext.html>`__. Example: Create a CartoContext object .. code:: python import cartoframes cc = cartoframes.CartoContext(BASEURL, APIKEY) Args: base_url (str): Base URL of CARTO user account. Cloud-based accounts are of the form ``https://{username}.carto.com`` (e.g., https://eschbacher.carto.com for user ``eschbacher``). On-premises installation users should ask their admin. api_key (str): CARTO API key. session (requests.Session, optional): requests session. See `requests documentation <http://docs.python-requests.org/en/master/user/advanced/>`__ for more information: verbose (bool, optional): Output underlying process states (True), or suppress (False, default) Returns: :obj:`CartoContext`: A CartoContext object that is authenticated against the user's CARTO account. """ def __init__(self, base_url=None, api_key=None, session=None, verbose=0): self.api_key, self.base_url = _process_credentials(api_key, base_url) self.auth_client = APIKeyAuthClient(base_url=self.base_url, api_key=self.api_key, session=session) self.sql_client = SQLClient(self.auth_client) self.username = self.auth_client.username self.is_org = self._is_org_user() self._map_templates = {} self._srcdoc = None self._verbose = verbose def _is_org_user(self): """Report whether user is in a multiuser CARTO organization or not""" res = self.sql_client.send('SHOW search_path') paths = [p.strip() for p in res['rows'][0]['search_path'].split(',')] # is an org user if first item is not `public` return paths[0] != 'public' def read(self, table_name, limit=None, index='cartodb_id', decode_geom=False): """Read tables from CARTO into pandas DataFrames. Example: .. code:: python import cartoframes cc = cartoframes.CartoContext(BASEURL, APIKEY) df = cc.read('acadia_biodiversity') Args: table_name (str): Name of table in user's CARTO account. limit (int, optional): Read only ``limit`` lines from ``table_name``. Defaults to `None`, which reads the full table. index (str, optional): Not currently in use. Returns: pandas.DataFrame: DataFrame representation of `table_name` from CARTO. """ query = 'SELECT * FROM "{table_name}"'.format(table_name=table_name) if limit: if (limit >= 0) and isinstance(limit, int): query += ' LIMIT {limit}'.format(limit=limit) else: raise ValueError("`limit` parameter must an integer >= 0") return self.query(query, decode_geom=decode_geom) def write(self, df, table_name, temp_dir='/tmp', overwrite=False, lnglat=None, encode_geom=False, geom_col=None): """Write a DataFrame to a CARTO table. Example: .. code:: python cc.write(df, 'brooklyn_poverty', overwrite=True) Args: df (pandas.DataFrame): DataFrame to write to ``table_name`` in user CARTO account table_name (str): Table to write ``df`` to in CARTO. temp_dir (str, optional): Directory for temporary storage of data that is sent to CARTO. Defaults to ``/tmp`` (Unix-like systems). overwrite (bool, optional): Behavior for overwriting ``table_name`` if it exits on CARTO. Defaults to ``False``. lnglat (tuple, optional): lng/lat pair that can be used for creating a geometry on CARTO. Defaults to ``None``. In some cases, geometry will be created without specifying this. See CARTO's `Import API <https://carto.com/docs/carto-engine/import-api/standard-tables>`__ for more information. encode_geom (bool, optional): Whether to write `geom_col` to CARTO as `the_geom`. geom_col (str, optional): The name of the column where geometry information is stored. Used in conjunction with `encode_geom`. Returns: None """ if encode_geom: _add_encoded_geom(df, geom_col) if not overwrite: # error if table exists and user does not want to overwrite self._table_exists(table_name) if df.shape[0] > MAX_IMPORT_ROWS: final_table_name = self._send_batches(df, table_name, temp_dir, geom_col) else: final_table_name = self._send_dataframe(df, table_name, temp_dir, geom_col) self._set_schema(df, final_table_name) # create geometry column from lat/longs if requested if lnglat: # TODO: make this a batch job if it is a large dataframe or move # inside of _send_dataframe and/or batch tqdm.write('Creating geometry out of columns ' '`{lng}`/`{lat}`'.format(lng=lnglat[0], lat=lnglat[1])) self.sql_client.send(''' UPDATE "{table_name}" SET the_geom = CDB_LatLng("{lat}"::numeric, "{lng}"::numeric) '''.format(table_name=final_table_name, lng=lnglat[0], lat=lnglat[1])) self._column_normalization(df, final_table_name, geom_col) tqdm.write('Table written to CARTO: ' '{base_url}dataset/{table_name}'.format( base_url=self.base_url, table_name=final_table_name)) def _table_exists(self, table_name): """Checks to see if table exists""" try: self.sql_client.send(''' EXPLAIN SELECT * FROM "{table_name}" '''.format(table_name=table_name)) raise NameError( 'Table `{table_name}` already exists. ' 'Run with `overwrite=True` if you wish to replace the ' 'table.'.format(table_name=table_name)) except CartoException as err: # If table doesn't exist, we get an error from the SQL API self._debug_print(err=err) return False return False def _send_batches(self, df, table_name, temp_dir, geom_col): """Batch sending a dataframe Args: df (pandas.DataFrame): DataFrame that will be batched up for sending to CARTO table_name (str): Name of table to send DataFrame to temp_dir (str): Local directory for temporary storage of DataFrame written to file that will be sent to CARTO geom_col (str): Name of encoded geometry column (if any) that will be dropped or converted to `the_geom` column Returns: final_table_name (str): Final table name on CARTO that the DataFrame is stored in Exceptions: * TODO: add more (Out of storage) """ subtables = [] # send dataframe chunks to carto for chunk_num, chunk in tqdm(df.groupby([i // MAX_IMPORT_ROWS for i in range(df.shape[0])]), desc='Uploading in batches: '): temp_table = '{orig}_cartoframes_temp_{chunk}'.format( orig=table_name[:40], chunk=chunk_num) try: # send dataframe chunk, get new name if collision temp_table = self._send_dataframe(chunk, temp_table, temp_dir, geom_col) except CartoException as err: self._drop_tables(subtables) raise CartoException(err) if temp_table: subtables.append(temp_table) self._debug_print(chunk_num=chunk_num, chunk_shape=str(chunk.shape), temp_table=temp_table) # combine chunks into final table try: select_base = ('SELECT %(schema)s ' 'FROM "{table}"') % dict(schema=_df2pg_schema(df)) unioned_tables = '\nUNION ALL\n'.join([select_base.format(table=t) for t in subtables]) self._debug_print(unioned=unioned_tables) query = ''' DROP TABLE IF EXISTS "{table_name}"; CREATE TABLE "{table_name}" As {unioned_tables}; ALTER TABLE {table_name} DROP COLUMN IF EXISTS cartodb_id; {drop_tables} SELECT CDB_CartoDBFYTable('{org}', '{table_name}'); '''.format(table_name=table_name, unioned_tables=unioned_tables, org=self.username if self.is_org else 'public', drop_tables=_drop_tables_query(subtables)) self._debug_print(query=query) _ = self.sql_client.send(query) except CartoException as err: try: self._drop_tables(subtables) except CartoException as err: warn('Failed to drop the following subtables from CARTO ' 'account: {}'.format(', '.join(subtables))) finally: raise Exception('Failed to upload dataframe: {}'.format(err)) return table_name def _drop_tables(self, tables): """Drop all tables in tables list Args: tables (list of str): list of table names Returns: None """ query = _drop_tables_query(tables) _ = self.sql_client.send(query) return None def _send_dataframe(self, df, table_name, temp_dir, geom_col): """Send a DataFrame to CARTO to be imported as a SQL table Args: df (pandas.DataFrame): DataFrame that is will be sent to CARTO table_name (str): Name on CARTO for the table that will have the data from ``df`` temp_dir (str): Name of directory used for temporarily storing the DataFrame file to sent to CARTO geom_col (str): Name of geometry column Returns: final_table_name (str): Name of final table. This method will overwrite the table `table_name` if it already exists. """ def remove_tempfile(filepath): """removes temporary file""" os.remove(filepath) tempfile = '{temp_dir}/{table_name}.csv'.format(temp_dir=temp_dir, table_name=table_name) self._debug_print(tempfile=tempfile) df.drop(geom_col, axis=1, errors='ignore').to_csv(tempfile) with open(tempfile, 'rb') as f: res = self._auth_send('api/v1/imports', 'POST', files={'file': f}, params={'type_guessing': 'false'}, stream=True) self._debug_print(res=res) if not res['success']: remove_tempfile(tempfile) raise CartoException('Failed to send DataFrame') import_id = res['item_queue_id'] remove_tempfile(tempfile) final_table_name = table_name while True: import_job = self._check_import(import_id) self._debug_print(import_job=import_job) final_table_name = self._handle_import(import_job, table_name) if import_job['state'] == 'complete': break # Wait a second before doing another request time.sleep(1.0) return final_table_name def _set_schema(self, dataframe, table_name): """Update a table associated with a dataframe to have the equivalent schema""" utility_cols = ('the_geom', 'the_geom_webmercator', 'cartodb_id') alter_temp = ('ALTER COLUMN "{col}" TYPE {ctype} USING ' 'NULLIF("{col}", \'\')::{ctype}') alter_cols = ', '.join(alter_temp.format(col=c, ctype=_dtypes2pg(t)) for c, t in zip(dataframe.columns, dataframe.dtypes) if c not in utility_cols) alter_query = 'ALTER TABLE "{table}" {alter_cols};'.format( table=table_name, alter_cols=alter_cols) self._debug_print(alter_query=alter_query) try: _ = self.sql_client.send(alter_query) except CartoException as err: warn('DataFrame written to CARTO but table schema failed to ' 'update to match DataFrame. All columns have data type ' '`text`. CARTO error: `{err}`. Query: {query}'.format( err=err, query=alter_query)) def _check_import(self, import_id): """Check the status of an Import API job""" res = self._auth_send('api/v1/imports/{}'.format(import_id), 'GET') return res def _handle_import(self, import_job, table_name): """Handle state of import job""" if import_job['state'] == 'failure': if import_job['error_code'] == 8001: raise CartoException('Over CARTO account storage limit for ' 'user `{}`. Try subsetting your ' 'DataFrame or dropping columns to reduce ' 'the data size.'.format(self.username)) elif import_job['error_code'] == 6668: raise CartoException('Too many rows in DataFrame. Try ' 'subsetting DataFrame before writing to ' 'CARTO.') else: raise CartoException('Error code: `{}`. See CARTO Import ' 'API error documentation for more ' 'information: https://carto.com/docs/' 'carto-engine/import-api/import-errors' ''.format(import_job['error_code'])) elif import_job['state'] == 'complete': self._debug_print(final_table=import_job['table_name']) if import_job['table_name'] != table_name: try: res = self.sql_client.send(''' DROP TABLE IF EXISTS {orig_table}; ALTER TABLE {dupe_table} RENAME TO {orig_table}; '''.format( orig_table=table_name, dupe_table=import_job['table_name'])) self._debug_print(res=res) except Exception as err: self._debug_print(err=err) raise Exception('Cannot overwrite table `{table_name}` ' '({err}). DataFrame was written to ' '`{new_table}` instead.'.format( table_name=table_name, err=err, new_table=import_job['table_name'])) return table_name def _column_normalization(self, dataframe, table_name, geom_col): """Print a warning if there is a difference between the normalized PostgreSQL column names and the ones in the DataFrame""" pgcolumns = self.sql_client.send(''' SELECT * FROM "{table_name}" LIMIT 0'''.format(table_name=table_name))['fields'].keys() diff_cols = (set(dataframe.columns) ^ set(pgcolumns)) - {'cartodb_id', geom_col} if diff_cols: cols = ', '.join('`{}`'.format(c) for c in diff_cols) tqdm.write('The following columns were renamed because of ' 'PostgreSQL column normalization requirements: ' '{cols}'.format(cols=cols)) def sync(self, dataframe, table_name): """Depending on the size of the DataFrame or CARTO table, perform granular operations on a DataFrame to only update the changed cells instead of a bulk upload. If on the large side, perform granular operations, if on the smaller side use Import API. Note: Not yet implemented. """ pass def query(self, query, table_name=None, decode_geom=False): """Pull the result from an arbitrary SQL query from a CARTO account into a pandas DataFrame. Can also be used to perform database operations (creating/dropping tables, adding columns, updates, etc.). Args: query (str): Query to run against CARTO user database. table_name (str, optional): If set, this will create a new table in the user's CARTO account that is the result of the query. Defaults to None (no table created). Returns: pandas.DataFrame: DataFrame representation of query supplied. Pandas data types are inferred from PostgreSQL data types. In the case of PostgreSQL date types, the data type 'object' is used. """ self._debug_print(query=query) if table_name: create_table_query = ''' CREATE TABLE {table_name} As SELECT * FROM ({query}) As _wrap; SELECT CDB_CartodbfyTable('{org}', '{table_name}'); '''.format(table_name=table_name, query=query, org=(self.username if self.is_org else 'public')) self._debug_print(create_table_query=create_table_query) create_table_res = self.sql_client.send(create_table_query) self._debug_print(create_table_res=create_table_res) new_table_name = create_table_res['rows'][0]['cdb_cartodbfytable'] self._debug_print(new_table_name=new_table_name) select_res = self.sql_client.send( 'SELECT * FROM {table_name}'.format(table_name=new_table_name)) else: select_res = self.sql_client.send(query) self._debug_print(select_res=select_res) # TODO: replace this with a function pg2dtypes = { 'date': 'object', 'number': 'float64', 'string': 'object', 'boolean': 'bool', 'geometry': 'object', } fields = select_res['fields'] schema = { field: pg2dtypes.get(fields[field]['type'], 'object') if field != 'cartodb_id' else 'int64' for field in fields } if not schema.keys(): return None self._debug_print(fields=fields, schema=schema) df = pd.DataFrame( data=select_res['rows'], columns=[k for k in fields]).astype(schema) if 'cartodb_id' in fields: df.set_index('cartodb_id', inplace=True) if decode_geom: df['geometry'] = df.the_geom.apply(_decode_geom) return df def map(self, layers=None, interactive=True, zoom=None, lat=None, lng=None, size=(800, 400), ax=None): """Produce a CARTO map visualizing data layers. Example: Create a map with two data layers, and one BaseMap layer. :: import cartoframes from cartoframes import Layer, BaseMap, styling cc = cartoframes.CartoContext(BASEURL, APIKEY) cc.map(layers=[BaseMap(), Layer('acadia_biodiversity', color={'column': 'simpson_index', 'scheme': styling.tealRose(7)}), Layer('peregrine_falcon_nest_sites', size='num_eggs', color={'column': 'bird_id', 'scheme': styling.vivid(10))], interactive=True) Args: layers (list, optional): List of one or more of the following: - Layer: cartoframes Layer object for visualizing data from a CARTO table. See `layer.Layer <#layer.Layer>`__ for all styling options. - BaseMap: Basemap for contextualizng data layers. See `layer.BaseMap <#layer.BaseMap>`__ for all styling options. - QueryLayer: Layer from an arbitrary query. See `layer.QueryLayer <#layer.QueryLayer>`__ for all styling options. interactive (bool, optional): Defaults to ``True`` to show an interactive slippy map. Setting to ``False`` creates a static map. zoom (int, optional): Zoom level of map. Acceptable values are usually in the range 0 to 19. 0 has the entire earth on a single tile (256px square). Zoom 19 is the size of a city block. Must be used in conjunction with ``lng`` and ``lat``. Defaults to a view to have all data layers in view. lat (float, optional): Latitude value for the center of the map. Must be used in conjunction with ``zoom`` and ``lng``. Defaults to a view to have all data layers in view. lng (float, optional): Longitude value for the center of the map. Must be used in conjunction with ``zoom`` and ``lat``. Defaults to a view to have all data layers in view. size (tuple, optional): List of pixel dimensions for the map. Format is ``(width, height)``. Defaults to ``(800, 400)``. Returns: IPython.display.HTML: Interactive maps are rendered in an ``iframe``, while static maps are rendered in ``img`` tags. """ # TODO: add layers preprocessing method like # layers = process_layers(layers) # that uses up to layer limit value error if not hasattr(IPython, 'display'): raise NotImplementedError('Nope, cannot display maps at the ' 'command line.') if layers is None: layers = [] elif not isinstance(layers, collections.Iterable): layers = [layers] else: layers = list(layers) if len(layers) > 8: raise ValueError('map can have at most 8 layers') if any([zoom, lat, lng]) != all([zoom, lat, lng]): raise ValueError('zoom, lat, and lng must all or none be provided') # When no layers are passed, set default zoom if ((len(layers) == 0 and zoom is None) or (len(layers) == 1 and layers[0].is_basemap)): [zoom, lat, lng] = [3, 38, -99] has_zoom = zoom is not None # Check basemaps, add one if none exist base_layers = [idx for idx, layer in enumerate(layers) if layer.is_basemap] if len(base_layers) > 1: raise ValueError('map can at most take 1 BaseMap layer') if len(base_layers) > 0: layers.insert(0, layers.pop(base_layers[0])) else: layers.insert(0, BaseMap()) # Check for a time layer, if it exists move it to the front time_layers = [idx for idx, layer in enumerate(layers) if not layer.is_basemap and layer.time] time_layer = layers[time_layers[0]] if len(time_layers) > 0 else None if len(time_layers) > 1: raise ValueError('Map can at most take 1 Layer with time ' 'column/field') if time_layer: raise NotImplementedError('Animated maps are not yet supported') if not interactive: raise ValueError('map cannot display a static image with a ' 'time_column') layers.append(layers.pop(time_layers[0])) # If basemap labels are on front, add labels layer basemap = layers[0] if basemap.is_basic() and basemap.labels == 'front': layers.append(BaseMap(basemap.source, labels=basemap.labels, only_labels=True)) # Setup layers for idx, layer in enumerate(layers): layer._setup(layers, idx) nb_layers = non_basemap_layers(layers) options = {'basemap_url': basemap.url} for idx, layer in enumerate(nb_layers): self._check_query(layer.query, style_cols=layer.style_cols) options['cartocss_' + str(idx)] = layer.cartocss options['sql_' + str(idx)] = layer.query params = { 'config': json.dumps(options), 'anti_cache': random.random(), } if has_zoom: params.update({'zoom': zoom, 'lat': lat, 'lon': lng}) options.update({'zoom': zoom, 'lat': lat, 'lng': lng}) else: options.update(self._get_bounds(nb_layers)) map_name = self._send_map_template(layers, has_zoom=has_zoom) api_url = '{base_url}api/v1/map'.format(base_url=self.base_url) static_url = ('{api_url}/static/named/{map_name}' '/{width}/{height}.png?{params}').format( api_url=api_url, map_name=map_name, width=size[0], height=size[1], params=urlencode(params)) html = '<img src="{url}" />'.format(url=static_url) # TODO: write this as a private method if interactive: netloc = urlparse(self.base_url).netloc domain = 'carto.com' if netloc.endswith('.carto.com') else netloc def safe_quotes(text, escape_single_quotes=False): """htmlify string""" if isinstance(text, str): safe_text = text.replace('"', """) if escape_single_quotes: safe_text = safe_text.replace("'", "\'") return safe_text.replace('True', 'true') return text config = { 'user_name': self.username, 'maps_api_template': self.base_url[:-1], 'sql_api_template': self.base_url[:-1], 'tiler_protocol': 'https', 'tiler_domain': domain, 'tiler_port': '80', 'type': 'torque' if time_layer else 'namedmap', 'named_map': { 'name': map_name, 'params': { k: safe_quotes(v, escape_single_quotes=True) for k, v in dict_items(options) }, }, } map_options = { 'filter': ['http', 'mapnik', 'torque'], 'https': True, } if time_layer: config.update({ 'order': 1, 'options': { 'query': time_layer.query, 'user_name': self.username, 'tile_style': time_layer.torque_cartocss, } }) config['named_map'].update({ 'layers': [{ 'layer_name': 't', }], }) map_options.update({ 'time_slider': True, 'loop': True, }) bounds = [] if has_zoom else [[options['north'], options['east']], [options['south'], options['west']]] content = self._get_iframe_srcdoc(config=config, bounds=bounds, options=options, map_options=map_options) img_html = html html = ( '<iframe srcdoc="{content}" width={width} height={height}>' ' Preview image: {img_html}' '</iframe>' ).format(content=safe_quotes(content), width=size[0], height=size[1], img_html=img_html) return IPython.display.HTML(html) else: try: import matplotlib.image as mpi import matplotlib.pyplot as plt except ImportError: warn('Matplotlib not detected. Saving image directly to disk') raise NotImplementedError raw_data = mpi.imread(static_url) f = plt.gcf() if ax is None: ax = plt.gca() ax.imshow(raw_data) ax.axis('off') return ax def data_boundaries(self, df=None, table_name=None): """Not currently implemented""" pass def data_discovery(self, keywords=None, regex=None, time=None, boundary=None): """Not currently implemented""" pass def data_augment(self, table_name, metadata): """Augment an existing CARTO table with `Data Observatory <https://carto.com/data-observatory>`__ measures. See the full `Data Observatory catalog <https://cartodb.github.io/bigmetadata/index.html>`__ for all available measures. The result of this operation is: 1. It updates `table_name` by adding columns from the Data Observatory 2. It returns a pandas DataFrame representation of that newly augmented table. Note: This method alters `table_name` in the user's CARTO database by adding additional columns. To avoid this, create a copy of the table first and use the new copy instead. Example: Add new measures to a CARTO table and pass it to a pandas DataFrame. Using the "Median Household Income in the past 12 months" measure from the `Data Observatory Catalog <https://cartodb.github.io/bigmetadata/united_states/income.html#median-household-income-in-the-past-12-months>`__. :: import cartoframes cc = cartoframes.CartoContext(BASEURL, APIKEY) median_income = [{'numer_id': 'us.census.acs.B19013001', 'geom_id': 'us.census.tiger.block_group', 'numer_timespan': '2011 - 2015'}] df = cc.data_augment('transaction_events', median_income) Args: table_name (str): Name of table on CARTO account that Data Observatory measures are to be added to. metadata (list of dicts): List of all measures to add to `table_name`. Each `dict` has the following keys: - `numer_id` (str): The identifier for the desired measurement - `geom_id` (str, optional): Identifier for a desired geographic boundary level to use when calculating measures. Will be automatically assigned if undefined - `normalization` (str, optional): The desired normalization. One of 'area', 'prenormalized', or 'denominated'. 'Area' will normalize the measure per square kilometer, 'prenormalized' will return the original value, and 'denominated' will normalize by a denominator. - `denom_id` (str, optional): Measure ID from DO catalog - `numer_timespan` (str, optional): The desired timespan for the measurement. Defaults to most recent timespan available if left unspecified. - `geom_timespan` (str, optional): The desired timespan for the geometry. Defaults to timespan matching `numer_timespan` if left unspecified. - `target_area` (str, optional): Instead of aiming to have `target_geoms` in the area of the geometry passed as extent, fill this area. Unit is square degrees WGS84. Set this to `0` if you want to use the smallest source geometry for this element of metadata, for example if you're passing in points. - `target_geoms` (str, optional): Override global `target_geoms` for this element of metadata - `max_timespan_rank` (str, optional): Override global `max_timespan_rank` for this element of metadata - `max_score_rank` (str, optional): Override global `max_score_rank` for this element of metadata Returns: pandas.DataFrame: A DataFrame representation of `table_name` which has new columns for each measure in `metadata`. """ try: with open(os.path.join(os.path.dirname(__file__), 'assets/data_obs_augment.sql'), 'r') as f: augment_functions = f.read() self.sql_client.send(augment_functions) except Exception as err: raise CartoException("Could not install `obs_augment_table` onto " "user account ({})".format(err)) # augment with data observatory metadata augment_query = ''' select obs_augment_table('{username}.{tablename}', '{cols_meta}'); '''.format(username=self.username, tablename=table_name, cols_meta=json.dumps(metadata)) resp = self.sql_client.send(augment_query) # read full augmented table return self.read(table_name) def _auth_send(self, relative_path, http_method, **kwargs): self._debug_print(relative_path=relative_path, http_method=http_method, kwargs=kwargs) res = self.auth_client.send(relative_path, http_method, **kwargs) if isinstance(res.content, str): return json.loads(res.content) return json.loads(res.content.decode('utf-8')) def _check_query(self, query, style_cols=None): """Checks if query from Layer or QueryLayer is valid""" try: self.sql_client.send(''' EXPLAIN SELECT {style_cols}{comma} the_geom, the_geom_webmercator FROM ({query}) _wrap; '''.format(query=query, comma=',' if style_cols else '', style_cols=','.join(style_cols))) except Exception as err: raise ValueError(('Layer query `{query}` and/or style column(s) ' '{cols} are not valid: {err}.' '').format(query=query, cols=', '.join(['`{}`'.format(c) for c in style_cols]), err=err)) def _send_map_template(self, layers, has_zoom): map_name = get_map_name(layers, has_zoom=has_zoom) if map_name not in self._map_templates: try: self._auth_send('api/v1/map/named', 'POST', headers={'Content-Type': 'application/json'}, data=get_map_template(layers, has_zoom=has_zoom)) except ValueError('map already exists'): pass self._map_templates[map_name] = True return map_name def _get_iframe_srcdoc(self, config, bounds, options, map_options): if not hasattr(self, '_srcdoc') or self._srcdoc is None: with open(os.path.join(os.path.dirname(__file__), 'assets/cartoframes.html'), 'r') as f: self._srcdoc = f.read() return (self._srcdoc .replace('@@CONFIG@@', json.dumps(config)) .replace('@@BOUNDS@@', json.dumps(bounds)) .replace('@@OPTIONS@@', json.dumps(map_options)) .replace('@@ZOOM@@', str(options.get('zoom', 3))) .replace('@@LAT@@', str(options.get('lat', 0))) .replace('@@LNG@@', str(options.get('lng', 0)))) def _get_bounds(self, layers): """Return the bounds of all data layers involved in a cartoframes map. Args: layers (list): List of cartoframes layers. See `cartoframes.layers` for all types. Returns: dict: Dictionary of northern, southern, eastern, and western bounds of the superset of data layers. Keys are `north`, `south`, `east`, and `west`. Units are in WGS84. """ extent_query = ('SELECT ST_EXTENT(the_geom) AS the_geom ' 'FROM ({query}) as t{idx}\n') union_query = 'UNION ALL\n'.join( [extent_query.format(query=layer.query, idx=idx) for idx, layer in enumerate(layers) if not layer.is_basemap]) extent = self.sql_client.send(''' SELECT ST_XMIN(ext) AS west, ST_YMIN(ext) AS south, ST_XMAX(ext) AS east, ST_YMAX(ext) AS north FROM ( SELECT st_extent(the_geom) AS ext FROM ({union_query}) AS _wrap1 ) AS _wrap2 '''.format(union_query=union_query)) return extent['rows'][0] def _debug_print(self, **kwargs): if self._verbose <= 0: return for key, value in dict_items(kwargs): if isinstance(value, requests.Response): str_value = "status_code: {status_code}, content: {content}".format( status_code=value.status_code, content=value.content) else: str_value = str(value) if self._verbose < 2 and len(str_value) > 300: str_value = '{}\n\n...\n\n{}'.format(str_value[:250], str_value[-50:]) print('{key}: {value}'.format(key=key, value=str_value))
SRC_API_KEY ) auth_dst_client = APIKeyAuthClient( DST_URL, DST_API_KEY ) # Get SQL API clients sql_src_client = SQLClient(auth_src_client) sql_dst_client = SQLClient(auth_dst_client) # Create a SQL utility function to extract source table structure. with open('generate_create_table_statement.sql', 'r') as f: query_generate_create_table_statement = f.read() logger.info('Creating function generate_create_table_statement...') res = sql_src_client.send(query_generate_create_table_statement) logger.info('Response: {}'.format(res)) # Get the table structure logger.info('Getting the CREATE TABLE statement for %s' % TABLE_NAME) query = ( "SELECT generate_create_table_statement('%s')" " AS create_table" % TABLE_NAME ) res = sql_src_client.send(query) create_table = res['rows'][0]['create_table'] # This is a bit of a trick: we omit the sequences to avoid # dependencies on other objects Normally this just affects the # cartodb_id and can optionally be fixed by cartodbfy'ing create_table_no_seqs = re.sub(r'DEFAULT nextval\([^\)]+\)', ' ', create_table)
table = dataset_manager.create(i) logger.info( 'Table imported: {table}'.format(table=table.name)) table_name.append(table.name) # define base table to insert all rows from other files base_table = table_name[0] # select all rows from table except cartodb_id to avoid possible errors columns_table = "select string_agg(column_name,',')" + \ " FROM information_schema.columns" + \ " where table_schema = '" + username + "' and table_name = '" + \ str(table_name[0]) + "' AND column_name <> 'cartodb_id'" result_query = sql.send(columns_table) for k, v in result_query.items(): if k == 'rows': for itr in v: dict_col = itr logging.debug(dict_col['string_agg']) # apply operation INSERT INTO SELECT with columns from previous query index = 1 for i in table_name: if i == base_table: continue
for i in file_folder: table = dataset_manager.create(i) logger.info('Table imported: {table}'.format(table=table.name)) table_name.append(table.name) # define base table to insert all rows from other files base_table = table_name[0] # select all rows from table except cartodb_id to avoid possible errors columns_table = "select string_agg(column_name,',')" + \ " FROM information_schema.columns" + \ " where table_schema = '" + username + "' and table_name = '" + \ str(table_name[0]) + "' AND column_name <> 'cartodb_id'" result_query = sql.send(columns_table) for k, v in result_query.items(): if k == 'rows': for itr in v: dict_col = itr logging.debug(dict_col['string_agg']) # apply operation INSERT INTO SELECT with columns from previous query index = 1 for i in table_name: if i == base_table: continue
help=('Api key of the account' ' (defaults to env variable CARTO_API_KEY)')) args = parser.parse_args() if not args.CARTO_BASE_URL or not args.CARTO_API_KEY: sys.exit(parser.print_usage()) auth_client = APIKeyAuthClient(args.CARTO_BASE_URL, args.CARTO_API_KEY) sql_client = SQLClient(auth_client) copy_client = CopySQLClient(auth_client) # Create a table suitable to receive the data logger.info('Creating table nexrad_copy_example...') sql_client.send("""CREATE TABLE IF NOT EXISTS nexrad_copy_example ( the_geom geometry(Geometry,4326), reflectivity numeric )""") sql_client.send( "SELECT CDB_CartodbfyTable(current_schema, 'nexrad_copy_example')") logger.info('Done') logger.info('Trying to connect to the THREDDS radar query service') rs = RadarServer( 'http://thredds.ucar.edu/thredds/radarServer/nexrad/level2/IDD/') logger.info('Quering data from the station') query = rs.query() query.stations('KLVX').time(datetime.utcnow()) assert rs.validate_query(query) catalog = rs.get_catalog(query)
# Set authentification to CARTO if args.CARTO_BASE_URL and args.CARTO_API_KEY: auth_client = APIKeyAuthClient( args.CARTO_BASE_URL, args.CARTO_API_KEY) else: logger.error('You need to provide valid credentials, run with ' '-h parameter for details') sys.exit(1) # Create and cartodbfy a table sqlClient = SQLClient(auth_client) sqlClient.send(""" CREATE TABLE IF NOT EXISTS copy_example ( the_geom geometry(Geometry,4326), name text, age integer ) """) sqlClient.send("SELECT CDB_CartodbfyTable(current_schema, 'copy_example')") copyClient = CopySQLClient(auth_client) # COPY FROM example logger.info("COPY'ing FROM file...") query = ('COPY copy_example (the_geom, name, age) ' 'FROM stdin WITH (FORMAT csv, HEADER true)') result = copyClient.copyfrom_file_path(query, 'files/copy_from.csv') logger.info('result = %s' % result) # COPY TO example with pandas DataFrame
args = parser.parse_args() # Authenticate to CARTO account if args.CARTO_BASE_URL and args.CARTO_API_KEY and args.organization: auth_client = APIKeyAuthClient( args.CARTO_BASE_URL, args.CARTO_API_KEY, args.organization) dataset_manager = DatasetManager(auth_client) else: logger.error('You need to provide valid credentials, run with -h parameter for details') import sys sys.exit(1) # SQL wrapper sql = SQLClient(APIKeyAuthClient(args.CARTO_BASE_URL, args.CARTO_API_KEY)) query = sql.send('EXPLAIN ANALYZE ' + args.queryUser) for key, value in query.items(): if key == 'rows': for itr in value: logger.info(itr) if key == 'time': logger.info(str(key) + ': ' + str(value)) query_arr = args.queryUser.upper().split() for i in query_arr: if i == '*': logger.warn('Do you need all columns? ' + 'You can improve the performance ' +
table_user.align['Attribute'] = 'l' table_user.align['Value'] = 'l' for row in userInfo: table_user.add_row(row) print(table_user) # print('{name}: {value}').format(name=i,value=str(user.__dict__[i])) except Exception as e: logger.warn('User has no admin of its organization') # SQL wrapper sql = SQLClient(APIKeyAuthClient(args.CARTO_BASE_URL, args.CARTO_API_KEY)) # show quota of user results = [] print('\nThe quotas of the user are:\n') quota = sql.send( "SELECT * FROM cdb_dataservices_client.cdb_service_quota_info()") for k, v in quota.items(): if k == 'rows': for itr in v: results.append([ itr['service'], itr['used_quota'], itr['provider'], itr['soft_limit'], itr['monthly_quota'] ]) table = PrettyTable( ['Service', 'Provider', 'Soft limit', 'Used quota', 'Monthly quota']) table.align['Used quota'] = 'l' table.align['Provider'] = 'r'
'(defaults to env variable CARTO_API_URL)') parser.add_argument('--api_key', dest='CARTO_API_KEY', default=os.environ['CARTO_API_KEY'] if 'CARTO_API_KEY' in os.environ else '', help='Api key of the account' + ' (defaults to env variable CARTO_API_KEY)') args = parser.parse_args() # Authenticate to CARTO account if args.CARTO_BASE_URL and args.CARTO_API_KEY and args.organization: auth_client = APIKeyAuthClient( args.CARTO_BASE_URL, args.CARTO_API_KEY, args.organization) dataset_manager = DatasetManager(auth_client) else: logger.error('You need to provide valid credentials, run with -h parameter for details') import sys sys.exit(1) # SQL wrapper sql = SQLClient(APIKeyAuthClient(args.CARTO_BASE_URL, args.CARTO_API_KEY)) queries = "select pid, query from pg_stat_activity \ WHERE usename = current_user" result = sql.send(queries) for key, value in result.items(): if key == 'rows': for itr in value: logger.info(itr)
sys.exit(1) # SQL wrapper sql = SQLClient(APIKeyAuthClient(args.CARTO_BASE_URL, args.CARTO_API_KEY)) # get username from base_url substring = re.search('https://(.+?).carto.com', args.CARTO_BASE_URL) if substring: username = substring.group(1) # check all table name of account all_tables = [] tables = sql.send( "select pg_class.relname from pg_class, pg_roles, pg_namespace" + " where pg_roles.oid = pg_class.relowner and " + "pg_roles.rolname = current_user " + "and pg_namespace.oid = pg_class.relnamespace and pg_class.relkind = 'r'") for k, v in tables.items(): if k == 'rows': for itr in v: all_tables.append(itr['relname']) # define array to store all the table sizes arr_size = [] # create array with values of the table sizes for i in all_tables: