def test_write_and_read_equivalency(setup_env): '''CSV Test 4: Ensure data and attr consistency after write and re-read''' csv1 = metacsv.read_csv(os.path.join( testdata_prefix, 'test1.csv')) csv1.attrs['other stuff'] = 'this should show up after write' csv1['new_col'] = (np.random.random((len(csv1), 1))) tmpfile = os.path.join(test_tmp_prefix, 'test_write_1.csv') csv1.to_csv(tmpfile) csv2 = metacsv.read_csv(tmpfile) csv1.__repr__() csv2.__repr__() assert ((abs(csv1.values - csv2.values) < 1e-7).all().all()) assert csv1.coords == csv2.coords assert csv1.variables == csv2.variables with open(tmpfile, 'w+') as tmp: csv1.to_csv(tmp) with open(tmpfile, 'r') as tmp: csv2 = metacsv.read_csv(tmp) assert ((abs(csv1.values - csv2.values) < 1e-7).all().all()) assert csv1.coords == csv2.coords assert csv1.variables == csv2.variables
def test_assertions(self): fp = os.path.join(self.testdata_prefix, 'test7.csv') df = metacsv.read_csv( fp, parse_vars=True, assertions={'attrs': { 'version': 'test5.2016-05-01.01' }}) df = metacsv.read_csv(fp, parse_vars=True, assertions={ 'attrs': { 'version': lambda x: x > 'test5.2016-05-01.00' } }) df = metacsv.read_csv( fp, parse_vars=True, assertions={'variables': { 'col2': { 'unit': 'digits' } }})
def test_write_and_read_equivalency(self): '''CSV Test 4: Ensure data and attr consistency after write and re-read''' csv1 = metacsv.read_csv(os.path.join( self.testdata_prefix, 'test1.csv')) csv1.attrs['other stuff'] = 'this should show up after write' csv1['new_col'] = (np.random.random((len(csv1), 1))) tmpfile = os.path.join(self.test_tmp_prefix, 'test_write_1.csv') csv1.to_csv(tmpfile) csv2 = metacsv.read_csv(tmpfile) csv1.__repr__() csv2.__repr__() self.assertTrue((abs(csv1.values - csv2.values) < 1e-7).all().all()) self.assertEqual(csv1.coords, csv2.coords) self.assertEqual(csv1.variables, csv2.variables) with open(tmpfile, 'w+') as tmp: csv1.to_csv(tmp) with open(tmpfile, 'r') as tmp: csv2 = metacsv.read_csv(tmp) self.assertTrue((abs(csv1.values - csv2.values) < 1e-7).all().all()) self.assertEqual(csv1.coords, csv2.coords) self.assertEqual(csv1.variables, csv2.variables)
def __init__(self, iam, ssp): """iam and ssp should be as described in the files (e.g., iam = 'low', ssp = 'SSP3')""" super(GDPpcProvider, self).__init__(iam, ssp, baseline_year) # Load the baseline values, and split by priority of data df = metacsv.read_csv(files.sharedpath(baseline_filepath)) self.df_baseline_this = df.loc[(df.model == iam) & (df.scenario == ssp) & (df.year == baseline_year)] self.df_baseline_anyiam = df.loc[(df.scenario == ssp) & ( df.year == baseline_year)].groupby('iso').median() self.baseline_global = df.loc[(df.scenario == ssp) & (df.year == baseline_year)].median() # Load the growth rates, and split by priority of data df_growth = metacsv.read_csv(files.sharedpath(growth_filepath)) df_growth['yearindex'] = np.int_((df_growth.year - baseline_year) / 5) self.df_growth_this = df_growth.loc[(df_growth.model == iam) & (df_growth.scenario == ssp)] self.df_growth_anyiam = df_growth.loc[( df_growth.scenario == ssp)].groupby(['iso', 'year']).median() self.growth_global = df_growth.loc[(df_growth.scenario == ssp) & (df_growth.model == iam)].groupby( ['year']).median() # Load the nightlights self.df_nightlights = metacsv.read_csv( files.sharedpath(nightlights_filepath)) # Cache for ISO-level GDPpc series self.cached_iso_gdppcs = {}
def test_assertions(setup_env): fp = os.path.join(testdata_prefix, 'test7.csv') df = metacsv.read_csv(fp, parse_vars=True, assertions={'attrs': {'version': 'test5.2016-05-01.01'}}) df = metacsv.read_csv(fp, parse_vars=True, assertions={'attrs': {'version': lambda x: x>'test5.2016-05-01.00'}}) df = metacsv.read_csv(fp, parse_vars=True, assertions={'variables': {'col2': {'unit': 'digits'}}})
def retrieve(cls, archive_name, api=None): if archive_name in cls._data: return cls._data[archive_name] if api is None: if cls._api is None: cls._api = datafs.get_api() api = cls._api archive = api.get_archive(archive_name) if archive_name.endswith('.csv'): with archive.open() as f: data = metacsv.read_csv(f) elif os.path.splitext(archive_name)[-1][:3] == '.nc': with archive.get_local_path() as fp: data = xr.open_dataset(fp).load() else: raise ValueError( 'file type not recognized: "{}"'.format(archive_name)) cls._data[archive_name] = data return data
def test_attr_updating(self): df = metacsv.read_csv(os.path.join(self.testdata_prefix, 'test6.csv')) df.coords.update({'ind3': ['s2'], 's2': None}) coords = df.coords # Send to xarray.Dataset ds = df.to_xarray() del df.coords # Create a similarly indexed series by # applying coords after the slice operation s = df['col1'] s.coords = coords # Send to xarray.DataArray da = s.to_xarray() self.assertTrue((ds.col1 == da).all().all()) df = metacsv.DataFrame(np.random.random((3,4))) df.add_coords() del df.coords df.index = pd.MultiIndex.from_tuples([('a','x'),('b','y'),('c','z')]) df.add_coords()
def get_pop(): lspop = pd.read_csv( files.sharedpath('social/processed/LandScan2011/gcpregions.csv'), header=17) dfir = metacsv.read_csv(files.sharedpath("regions/hierarchy_metacsv.csv")) dfir = dfir.loc[dfir.is_terminal].reset_index(drop=True) dfir.drop([ 'parent-key', 'name', 'alternatives', 'is_terminal', 'gadmid', 'agglomid', 'notes' ], axis=1, inplace=True) dfir.rename(columns={'region-key': 'region'}, inplace=True) dfir['iso'] = dfir.region.apply(lambda x: x[:3]) df = pd.read_csv( files.sharedpath( 'social/baselines/population/merged/population-merged.all.csv')) df.rename(columns={'value': 'pop'}, inplace=True) df['iso'] = df.region.apply(lambda x: x[:3]) df.drop(['index'], axis=1, inplace=True) df = df.loc[df.year > 2005] dff = df.append(get_missing_pop(df, dfir, lspop), sort=False).sort_values(['region', 'year']).reset_index(drop=True) return dff
def get_gdppc_all_regions(model, ssp): print(model + " " + ssp) os.chdir(os.getenv("HOME") + '/repos/impact-calculations') moddict = {'high': 'OECD Env-Growth', 'low': 'IIASA GDP'} df = metacsv.read_csv(files.sharedpath("regions/hierarchy_metacsv.csv")) tmplist = [] yvect = np.arange(2010, 2101) svect = np.repeat(ssp, yvect.size) mvect = np.repeat(moddict[model], yvect.size) provider = gdppc.GDPpcProvider(model, ssp) for ii in np.where(df.is_terminal)[0]: series = provider.get_timeseries(df['region-key'][ii]) rvect = np.repeat(df['region-key'][ii], yvect.size) tmp = pd.DataFrame({ 'region': rvect, 'year': yvect, 'gdppc': series, 'ssp': svect, 'model': mvect }) tmplist.append(tmp) dfg = pd.concat(tmplist, sort=False) return dfg
def test_attr_updating(setup_env): df = metacsv.read_csv(os.path.join(testdata_prefix, 'test6.csv')) df.coords.update({'ind3': ['s2'], 's2': None}) coords = df.coords # Send to xarray.Dataset ds = df.to_xarray() del df.coords # Create a similarly indexed series by # applying coords after the slice operation s = df['col1'] s.coords = coords # Send to xarray.DataArray da = s.to_xarray() assert ((ds.col1 == da).all().all()) df = metacsv.DataFrame(np.random.random((3,4))) df.add_coords() del df.coords df.index = pd.MultiIndex.from_tuples([('a','x'),('b','y'),('c','z')]) df.add_coords()
def test_series_conversion_to_xarray(self): '''CSV Test 5: Check conversion of metacsv.Series to xarray.DataArray''' csv1 = metacsv.read_csv(os.path.join( self.testdata_prefix, 'test5.csv'), squeeze=True) self.assertEqual(len(csv1.shape), 1) self.assertEqual(csv1.to_xarray().shape, csv1.shape) self.assertTrue((csv1.to_xarray().values == csv1.values).all())
def test_series_conversion_to_xarray(setup_env): '''CSV Test 5: Check conversion of metacsv.Series to xarray.DataArray''' csv1 = metacsv.read_csv(os.path.join( testdata_prefix, 'test5.csv'), squeeze=True) assert len(csv1.shape) == 1 assert csv1.to_xarray().shape == csv1.shape assert ((csv1.to_xarray().values == csv1.values).all())
def test_parse_vars(setup_env): df = metacsv.read_csv( os.path.join(testdata_prefix, 'test8.csv'), parse_vars=True, index_col=[0,1,2], coords={'ind1':None, 'ind2':None, 'ind3':['ind2']}) assert (df.hasattr(df.variables['col1'], 'description')) df.variables['col1']['description'] == 'The first column' df.variables['col2']['unit'] == 'digits'
def test_parse_vars(self): df = metacsv.read_csv(os.path.join( self.testdata_prefix, 'test7.csv'), parse_vars=True, index_col=0) ds = df.to_xarray() self.assertEqual(ds.col1.attrs['description'], 'The first column') self.assertEqual(ds.col1.attrs['unit'], 'wigits') self.assertEqual(ds.col2.attrs['description'], 'The second column') self.assertEqual(ds.col2.attrs['unit'], 'digits')
def test_parse_vars(setup_env): df = metacsv.read_csv(os.path.join( testdata_prefix, 'test7.csv'), parse_vars=True, index_col=0) ds = df.to_xarray() assert ds.col1.attrs['description'] == 'The first column' assert ds.col1.attrs['unit'] == 'wigits' assert ds.col2.attrs['description'] == 'The second column' assert ds.col2.attrs['unit'] == 'digits'
def test_parse_vars(self): df = metacsv.read_csv( os.path.join(self.testdata_prefix, 'test8.csv'), parse_vars=True, index_col=[0,1,2], coords={'ind1':None, 'ind2':None, 'ind3':['ind2']}) self.assertTrue(df.hasattr(df.variables['col1'], 'description')) self.assertEqual(df.variables['col1']['description'], 'The first column') self.assertEqual(df.variables['col2']['unit'], 'digits')
def test_coordinate_conversion_to_xarray(self): '''CSV Test 2: Make sure only base coordinates are used in determining xarray dimensionality''' df = metacsv.read_csv(os.path.join(self.testdata_prefix, 'test6.csv')) df_str = df.__repr__() self.assertEqual(df.to_xarray().isnull().sum().col1, 0) self.assertEqual(df.to_xarray().isnull().sum().col2, 0) # Test automatic coords assignment df = metacsv.read_csv(os.path.join( self.testdata_prefix, 'test5.csv'), squeeze=True) del df.coords ds = df.to_xarray() self.assertNotEqual(len(df.shape), len(ds.shape)) self.assertEqual(df.shape[0], ds.shape[0]) self.assertTrue(ds.shape[1] > 1)
def test_coordinate_conversion_to_xarray(setup_env): '''CSV Test 2: Make sure only base coordinates are used in determining xarray dimensionality''' df = metacsv.read_csv(os.path.join(testdata_prefix, 'test6.csv')) df_str = df.__repr__() assert df.to_xarray().isnull().sum().col1 == 0 assert df.to_xarray().isnull().sum().col2 == 0 # Test automatic coords assignment df = metacsv.read_csv(os.path.join( testdata_prefix, 'test5.csv'), squeeze=True) del df.coords ds = df.to_xarray() assert len(df.shape) != len(ds.shape) assert df.shape[0] == ds.shape[0] assert (ds.shape[1] > 1)
def test_read_csv(self): """CSV Test 1: Check DataFrame data for CSVs with and without yaml headers""" csv1 = metacsv.read_csv(os.path.join( self.testdata_prefix, 'test1.csv')) csv2 = pd.read_csv(os.path.join(self.testdata_prefix, 'test2.csv')) csv1.__repr__() csv2.__repr__() self.assertTrue( (csv1.values == csv2.set_index('ind').values).all().all())
def test_read_csv(setup_env): """CSV Test 1: Check DataFrame data for CSVs with and without yaml headers""" csv1 = metacsv.read_csv(os.path.join( testdata_prefix, 'test1.csv')) csv2 = pd.read_csv(os.path.join(testdata_prefix, 'test2.csv')) csv1.__repr__() csv2.__repr__() assert ( (csv1.values == csv2.set_index('ind').values).all().all())
def test_for_series_attributes(setup_env): '''CSV Test 3: Ensure read_csv preserves attrs with squeeze=True conversion to Series This test is incomplete - a complete test would check that attrs are preserved when index_col is not set and the index is set by coords. Currently, this does not work. ''' s = metacsv.read_csv(os.path.join( testdata_prefix, 'test5.csv'), squeeze=True, index_col=[0, 1]) s.__repr__() assert (hasattr(s, 'attrs') and ('author' in s.attrs)) assert s.attrs['author'] == 'series creator'
def test_header_writer(self): fp = os.path.join(self.testdata_prefix, 'test9.csv') attrs = {'author': 'test author', 'contact': '*****@*****.**'} coords = {'ind1': None, 'ind2': None, 'ind3': 'ind2'} variables = {'col1': dict(description='my first column'), 'col2': dict(description='my second column')} tmpheader = os.path.join(self.test_tmp_prefix, 'test_header.header') metacsv.to_header(tmpheader, attrs=attrs, coords=coords, variables=variables) df = metacsv.read_csv(fp, header_file=tmpheader) self.assertEqual(df.attrs, attrs) self.assertEqual(df.coords, coords) self.assertEqual(df.variables, variables)
def test_for_series_attributes(self): '''CSV Test 3: Ensure read_csv preserves attrs with squeeze=True conversion to Series This test is incomplete - a complete test would check that attrs are preserved when index_col is not set and the index is set by coords. Currently, this does not work. ''' s = metacsv.read_csv(os.path.join( self.testdata_prefix, 'test5.csv'), squeeze=True, index_col=[0, 1]) s.__repr__() self.assertTrue(hasattr(s, 'attrs') and ('author' in s.attrs)) self.assertEqual(s.attrs['author'], 'series creator')
def test_header_writer(setup_env): fp = os.path.join(testdata_prefix, 'test9.csv') attrs = {'author': 'test author', 'contact': '*****@*****.**'} coords = {'ind1': None, 'ind2': None, 'ind3': 'ind2'} variables = {'col1': dict(description='my first column'), 'col2': dict(description='my second column')} tmpheader = os.path.join(test_tmp_prefix, 'test_header.header') metacsv.to_header(tmpheader, attrs=attrs, coords=coords, variables=variables) df = metacsv.read_csv(fp, header_file=tmpheader) assert df.attrs == attrs assert df.coords == coords assert df.variables == variables
def test_change_dims(self): testfile = os.path.join(self.testdata_prefix, 'test6.csv') df = metacsv.read_csv(testfile) # Test DataFrame._constructor_sliced series = df[df.columns[0]] self.assertTrue(hasattr(series, 'coords')) # Test Series._constructor_expanddims df2 = metacsv.DataFrame({df.columns[0]: series}) self.assertTrue(hasattr(df2, 'coords')) # Test DataFrame._constructor_expanddims panel = metacsv.Panel({'df': df}) self.assertTrue(hasattr(panel, 'coords')) # Test Panel._constructor_sliced df3 = panel['df'] self.assertTrue(hasattr(df3, 'coords'))
def test_change_dims(setup_env): testfile = os.path.join(testdata_prefix, 'test6.csv') df = metacsv.read_csv(testfile) # Test DataFrame._constructor_sliced series = df[df.columns[0]] assert (hasattr(series, 'coords')) # Test Series._constructor_expanddims df2 = metacsv.DataFrame({df.columns[0]: series}) assert (hasattr(df2, 'coords')) # Test DataFrame._constructor_expanddims panel = metacsv.Panel({'df': df}) assert (hasattr(panel, 'coords')) # Test Panel._constructor_sliced df3 = panel['df'] assert (hasattr(df3, 'coords'))
def test_command_line_converter(self): convert_script = 'metacsv.scripts.convert' testfile = os.path.join(self.testdata_prefix, 'test6.csv') newname = os.path.splitext(os.path.basename(testfile))[0] + '.nc' outfile = os.path.join(self.test_tmp_prefix, newname) p = subprocess.Popen( ['python', '-m', convert_script, 'netcdf', testfile, outfile], stderr=subprocess.PIPE, stdout=subprocess.PIPE) out, err = p.communicate() if len(err.strip()) != 0: print(err.strip().decode(locale.getpreferredencoding())) self.assertEqual(len(err.strip()), 0) df = metacsv.read_csv(testfile) with xr.open_dataset(outfile) as ds: self.assertTrue((abs(df.values - ds.to_dataframe().set_index( [i for i in df.coords if i not in df.base_coords]).values) < 1e-7).all().all())
def test_command_line_converter(setup_env): convert_script = 'metacsv.scripts.convert' testfile = os.path.join(testdata_prefix, 'test6.csv') newname = os.path.splitext(os.path.basename(testfile))[0] + '.nc' outfile = os.path.join(test_tmp_prefix, newname) p = subprocess.Popen( ['python', '-m', convert_script, 'netcdf', testfile, outfile], stderr=subprocess.PIPE, stdout=subprocess.PIPE) out, err = p.communicate() if len(err.strip()) != 0: print(err.strip().decode(locale.getpreferredencoding())) assert len(err.strip()) == 0 df = metacsv.read_csv(testfile) with xr.open_dataset(outfile) as ds: assert ((abs(df.values - ds.to_dataframe().set_index( [i for i in df.coords if i not in df.base_coords]).values) < 1e-7).all().all())
def test_command_line_version_check(self): def get_version(readfile): version_check_script = 'metacsv.scripts.version' p = subprocess.Popen( ['python', '-m', version_check_script, readfile], stderr=subprocess.PIPE, stdout=subprocess.PIPE) out, err = p.communicate() if len(err) != 0: raise VersionError(err.strip()) else: return out.strip().decode(locale.getpreferredencoding()) testfile = os.path.join(self.testdata_prefix, 'test6.csv') with self.assertRaises(VersionError): get_version(testfile) testfile = os.path.join(self.testdata_prefix, 'test5.csv') df = metacsv.read_csv(testfile) self.assertEqual(get_version(testfile), df.attrs['version'])
def test_command_line_version_check(setup_env): def get_version(readfile): version_check_script = 'metacsv.scripts.version' p = subprocess.Popen( ['python', '-m', version_check_script, readfile], stderr=subprocess.PIPE, stdout=subprocess.PIPE) out, err = p.communicate() if len(err) != 0: raise VersionError(err.strip()) else: return out.strip().decode(locale.getpreferredencoding()) testfile = os.path.join(testdata_prefix, 'test6.csv') with pytest.raises(VersionError): get_version(testfile) testfile = os.path.join(testdata_prefix, 'test5.csv') df = metacsv.read_csv(testfile) assert get_version(testfile) == df.attrs['version']
def test_xarray_variable_attribute_persistence(setup_env): testfile = os.path.join(testdata_prefix, 'test6.csv') assert (metacsv.read_csv( testfile).to_xarray().col1.attrs['unit'] == 'wigits')
from imperact.lib.errors import UserException template_root = 'imperact' CUSTOM_CONTENT_TYPE = 'image/png' import imperact as aggregator else: from aggregator.lib.base import BaseController from aggregator.lib.errors import UserException template_root = 'aggregator' from tg.controllers import CUSTOM_CONTENT_TYPE import aggregator directory_root = '/shares/gcp/outputs' scripts_root = '/home/jrising/research/gcp/imperactive/scripts' last_purge = time.mktime(datetime.datetime(2017, 7, 20, 0, 0, 0).timetuple()) hierarchy = metacsv.read_csv("/shares/gcp/regions/hierarchy_metacsv.csv") irlevel_keys = np.concatenate( (hierarchy['region-key'][hierarchy['is_terminal']], hierarchy['region-key'][hierarchy['is_terminal']], hierarchy['region-key'][(hierarchy['alternatives'].astype('S') != 'nan') & (hierarchy['is_terminal'])])).astype('S') irlevel_labels = np.concatenate( (hierarchy['region-key'][hierarchy['is_terminal']], hierarchy['name'][hierarchy['is_terminal']], hierarchy['alternatives'][(hierarchy['alternatives'].astype('S') != 'nan') & (hierarchy['is_terminal'])])).astype('S') irlevel_search = np.core.defchararray.lower(irlevel_labels) aggregated_keys = np.concatenate( (['',
def get_version(readfile): df = metacsv.read_csv(readfile) version = df.attrs.get('version', None) return version
def test_xarray_variable_attribute_persistence(self): testfile = os.path.join(self.testdata_prefix, 'test6.csv') self.assertTrue(metacsv.read_csv( testfile).to_xarray().col1.attrs['unit'], 'wigits')
def test_standalone_properties(self): df = metacsv.read_csv(os.path.join(self.testdata_prefix, 'test3.csv')) df.columns = ['index', 'column1', 'column2'] df.set_index('index', inplace=True) variables = metacsv.core.internals.Variables({ 'column1': { 'units': 'wigits' }, 'column2': { 'units': 'wigits' }}) df.variables = variables self.assertEqual(df.variables, variables) self.assertEqual(df.variables.__repr__(), variables.__repr__()) self.assertEqual(df.variables[df.columns[1]]['units'], 'wigits') attrs = metacsv.core.internals.Attributes() self.assertEqual(attrs, None) self.assertFalse('author' in attrs.copy()) with self.assertRaises(KeyError): del attrs['author'] with self.assertRaises(KeyError): err = attrs['author'] with self.assertRaises(KeyError): err = attrs.get('author') self.assertEqual(attrs.get('author', None), None) with self.assertRaises(ValueError): err = attrs.get('author', 1, 2) self.assertEqual(attrs.pop('author', None), None) with self.assertRaises(KeyError): err = attrs.pop('author') with self.assertRaises(ValueError): err = attrs.pop('author', 1, 2) self.assertEqual(attrs, None) self.assertEqual(attrs.__repr__(), '<Empty Attributes>') attrs['author'] = 'My Name' attrs['contact'] = '*****@*****.**' self.assertEqual(attrs.pop('author', None), 'My Name') self.assertEqual(attrs.pop('author', None), None) df.attrs.update(attrs) df.attrs.update({'project': 'metacsv'}) with self.assertRaises(TypeError): df.attrs.update(1) self.assertNotEqual(df.attrs, attrs) del df.attrs['project'] self.assertEqual(df.attrs, attrs) self.assertEqual(df.attrs['contact'], '*****@*****.**') self.assertEqual(df.attrs.get('contact'), '*****@*****.**') self.assertEqual(df.attrs.get('other', 'thing'), 'thing') self.assertEqual(df.attrs.pop('contact'), '*****@*****.**') self.assertEqual(df.attrs.pop('contact', 'nope'), 'nope') self.assertNotEqual(df.attrs, attrs) attrs['author'] = 'My Name' df.variables['column1'] = attrs self.assertEqual(df.variables['column1']['author'], 'My Name') var = df.variables.copy() self.assertEqual(df.variables, var) with self.assertRaises(TypeError): var.parse_string_var(['unit']) self.assertTrue('description' in var.parse_string_var('variable name [unit]')) self.assertEqual(var.parse_string_var('variable [ name'), 'variable [ name') with self.assertRaises(TypeError): df.variables = [] del df.variables # Test round trip df2 = metacsv.read_csv( os.path.join(self.testdata_prefix, 'test3.csv'), index_col=[0], skiprows=1, names=['column1', 'column2']) df2.index.names = ['index'] self.assertTrue((df == df2).all().all())
def test_converters(setup_env): tmpfile = os.path.join(test_tmp_prefix, 'test_write_1.csv') tmpnc = os.path.join(test_tmp_prefix, 'test_write_1.nc') df = pd.DataFrame(np.random.random((3,4)), columns=list('abcd')) df.index.names = ['ind'] attrs = {'author': 'My Name'} metacsv.to_csv(df, tmpfile, attrs=attrs, coords={'ind': None}) da = metacsv.to_dataarray(df, attrs=attrs, coords={'ind': None}) ds1 = metacsv.to_xarray(df, attrs=attrs, coords={'ind': None}) ds2 = metacsv.to_dataset(df, attrs=attrs, coords={'ind': None}) df2 = metacsv.DataFrame(df, attrs=attrs) df2.add_coords() df3 = metacsv.read_csv(tmpfile) assert df2.coords == df3.coords assert ((ds1 == ds2).all().all()) assert df.shape[0]*df.shape[1] == da.shape[0]*da.shape[1] attrs = metacsv.core.internals.Attributes() attrs.update(da.attrs) assert df2.attrs == attrs df = metacsv.read_csv(os.path.join(testdata_prefix, 'test6.csv')) ds = df.to_xarray() da = df.to_dataarray() assert not ds.col2.isnull().any() attrs = df.attrs.copy() coords = df.coords.copy() variables = df.variables.copy() df.columns.names = ['cols'] s = df.stack('cols') metacsv.to_csv(s, tmpfile, attrs={'author': 'my name'}) s = metacsv.Series(s) coords.update({'cols': None}) s.attrs = attrs s.coords = coords s.variables = variables s.to_xarray() s.to_dataarray() s.to_dataset() with pytest.raises(TypeError): metacsv.to_xarray(['a','b','c']) metacsv.to_csv( os.path.join(testdata_prefix, 'test6.csv'), tmpfile, attrs={'author': 'test author'}, variables={'col1': {'unit': 'digits'}}) df = metacsv.read_csv(tmpfile) assert (df.attrs['author'] == 'test author') ds = metacsv.to_xarray(tmpfile) assert ds.col1.attrs['unit'] == 'digits' metacsv.to_netcdf(tmpfile, tmpnc) with xr.open_dataset(tmpnc) as ds: assert (ds.col1.attrs['unit'] == 'digits')
def test_converters(self): tmpfile = os.path.join(self.test_tmp_prefix, 'test_write_1.csv') tmpnc = os.path.join(self.test_tmp_prefix, 'test_write_1.nc') df = pd.DataFrame(np.random.random((3,4)), columns=list('abcd')) df.index.names = ['ind'] attrs = {'author': 'My Name'} metacsv.to_csv(df, tmpfile, attrs=attrs, coords={'ind': None}) da = metacsv.to_dataarray(df, attrs=attrs, coords={'ind': None}) ds1 = metacsv.to_xarray(df, attrs=attrs, coords={'ind': None}) ds2 = metacsv.to_dataset(df, attrs=attrs, coords={'ind': None}) df2 = metacsv.DataFrame(df, attrs=attrs) df2.add_coords() df3 = metacsv.read_csv(tmpfile) self.assertEqual(df2.coords, df3.coords) self.assertTrue((ds1 == ds2).all().all()) self.assertEqual(df.shape[0]*df.shape[1], da.shape[0]*da.shape[1]) attrs = metacsv.core.internals.Attributes() attrs.update(da.attrs) self.assertEqual(df2.attrs, attrs) df = metacsv.read_csv(os.path.join(self.testdata_prefix, 'test6.csv')) ds = df.to_xarray() da = df.to_dataarray() self.assertFalse(ds.col2.isnull().any()) attrs = df.attrs.copy() coords = df.coords.copy() variables = df.variables.copy() df.columns.names = ['cols'] s = df.stack('cols') metacsv.to_csv(s, tmpfile, attrs={'author': 'my name'}) s = metacsv.Series(s) coords.update({'cols': None}) s.attrs = attrs s.coords = coords s.variables = variables s.to_xarray() s.to_dataarray() s.to_dataset() with self.assertRaises(TypeError): metacsv.to_xarray(['a','b','c']) metacsv.to_csv( os.path.join(self.testdata_prefix, 'test6.csv'), tmpfile, attrs={'author': 'test author'}, variables={'col1': {'unit': 'digits'}}) df = metacsv.read_csv(tmpfile) self.assertEqual(df.attrs['author'], 'test author') ds = metacsv.to_xarray(tmpfile) self.assertEqual(ds.col1.attrs['unit'], 'digits') metacsv.to_netcdf(tmpfile, tmpnc) with xr.open_dataset(tmpnc) as ds: self.assertEqual(ds.col1.attrs['unit'], 'digits')
def test_standalone_properties(setup_env): df = metacsv.read_csv(os.path.join(testdata_prefix, 'test3.csv')) df.columns = ['index', 'column1', 'column2'] df.set_index('index', inplace=True) variables = metacsv.core.internals.Variables({ 'column1': { 'units': 'wigits' }, 'column2': { 'units': 'wigits' }}) df.variables = variables assert df.variables == variables assert df.variables.__repr__() == variables.__repr__() assert df.variables[df.columns[1]]['units'] == 'wigits' attrs = metacsv.core.internals.Attributes() assert attrs == None assert not 'author' in attrs.copy() with pytest.raises(KeyError): del attrs['author'] with pytest.raises(KeyError): err = attrs['author'] with pytest.raises(KeyError): err = attrs.get('author') assert attrs.get('author', None) == None with pytest.raises(ValueError): err = attrs.get('author', 1, 2) assert attrs.pop('author', None) == None with pytest.raises(KeyError): err = attrs.pop('author') with pytest.raises(ValueError): err = attrs.pop('author', 1, 2) assert attrs == None assert attrs.__repr__() == '<Empty Attributes>' attrs['author'] = 'My Name' attrs['contact'] = '*****@*****.**' assert attrs.pop('author', None) == 'My Name' assert attrs.pop('author', None) == None df.attrs.update(attrs) df.attrs.update({'project': 'metacsv'}) with pytest.raises(TypeError): df.attrs.update(1) assert not df.attrs == attrs del df.attrs['project'] assert df.attrs == attrs assert df.attrs['contact'] == '*****@*****.**' assert df.attrs.get('contact') == '*****@*****.**' assert df.attrs.get('other', 'thing') == 'thing' assert df.attrs.pop('contact') == '*****@*****.**' assert df.attrs.pop('contact', 'nope') == 'nope' assert df.attrs != attrs attrs['author'] = 'My Name' df.variables['column1'] = attrs assert df.variables['column1']['author'] == 'My Name' var = df.variables.copy() assert df.variables == var with pytest.raises(TypeError): var.parse_string_var(['unit']) assert ('description' in var.parse_string_var('variable name [unit]')) assert var.parse_string_var('variable [ name') == 'variable [ name' with pytest.raises(TypeError): df.variables = [] del df.variables # Test round trip df2 = metacsv.read_csv( os.path.join(testdata_prefix, 'test3.csv'), index_col=[0], skiprows=1, names=['column1', 'column2']) df2.index.names = ['index'] assert ((df == df2).all().all())
if df.shape[0] > 0: return df if iso in df_anyiam.index: df = df_anyiam.loc[iso] if df.shape[0] > 0: return df return df_global if __name__ == '__main__': # Test the provider import time time0 = time.time() provider = GDPpcProvider('low', 'SSP3') df = metacsv.read_csv(files.sharedpath("regions/hierarchy_metacsv.csv")) time1 = time.time() print "Load time: %s seconds" % (time1 - time0) for ii in np.where(df.is_terminal)[0]: xx = provider.get_timeseries(df.iloc[ii, 0]) time2 = time.time() print "First pass: %s seconds" % (time2 - time1) for ii in np.where(df.is_terminal)[0]: xx = provider.get_timeseries(df.iloc[ii, 0]) time3 = time.time() print "Second pass: %s seconds" % (time3 - time2)