def test_squeeze(self): data = Dataset({'foo': (['x', 'y', 'z'], [[[1], [2]]])}) for args in [[], [['x']], [['x', 'z']]]: def get_args(v): return [set(args[0]) & set(v.dims)] if args else [] expected = Dataset(dict((k, v.squeeze(*get_args(v))) for k, v in iteritems(data.variables))) expected.set_coords(data.coords, inplace=True) self.assertDatasetIdentical(expected, data.squeeze(*args)) # invalid squeeze with self.assertRaisesRegexp(ValueError, 'cannot select a dimension'): data.squeeze('y')
def test_coords_set(self): one_coord = Dataset({'x': ('x', [0]), 'yy': ('x', [1]), 'zzz': ('x', [2])}) two_coords = Dataset({'zzz': ('x', [2])}, {'x': ('x', [0]), 'yy': ('x', [1])}) all_coords = Dataset(coords={'x': ('x', [0]), 'yy': ('x', [1]), 'zzz': ('x', [2])}) actual = one_coord.set_coords('x') self.assertDatasetIdentical(one_coord, actual) actual = one_coord.set_coords(['x']) self.assertDatasetIdentical(one_coord, actual) actual = one_coord.set_coords('yy') self.assertDatasetIdentical(two_coords, actual) actual = one_coord.set_coords(['yy', 'zzz']) self.assertDatasetIdentical(all_coords, actual) actual = one_coord.reset_coords() self.assertDatasetIdentical(one_coord, actual) actual = two_coords.reset_coords() self.assertDatasetIdentical(one_coord, actual) actual = all_coords.reset_coords() self.assertDatasetIdentical(one_coord, actual) actual = all_coords.reset_coords(['yy', 'zzz']) self.assertDatasetIdentical(one_coord, actual) actual = all_coords.reset_coords('zzz') self.assertDatasetIdentical(two_coords, actual) with self.assertRaisesRegexp(ValueError, 'cannot remove index'): one_coord.reset_coords('x') actual = all_coords.reset_coords('zzz', drop=True) expected = all_coords.drop_vars('zzz') self.assertDatasetIdentical(expected, actual) expected = two_coords.drop_vars('zzz') self.assertDatasetIdentical(expected, actual)
def test_to_and_from_dataframe(self): x = np.random.randn(10) y = np.random.randn(10) t = list('abcdefghij') ds = Dataset(OrderedDict([('a', ('t', x)), ('b', ('t', y)), ('t', ('t', t))])) expected = pd.DataFrame(np.array([x, y]).T, columns=['a', 'b'], index=pd.Index(t, name='t')) actual = ds.to_dataframe() # use the .equals method to check all DataFrame metadata assert expected.equals(actual), (expected, actual) # verify coords are included actual = ds.set_coords('b').to_dataframe() assert expected.equals(actual), (expected, actual) # check roundtrip self.assertDatasetIdentical(ds, Dataset.from_dataframe(actual)) # test a case with a MultiIndex w = np.random.randn(2, 3) ds = Dataset({'w': (('x', 'y'), w)}) ds['y'] = ('y', list('abc')) exp_index = pd.MultiIndex.from_arrays( [[0, 0, 0, 1, 1, 1], ['a', 'b', 'c', 'a', 'b', 'c']], names=['x', 'y']) expected = pd.DataFrame(w.reshape(-1), columns=['w'], index=exp_index) actual = ds.to_dataframe() self.assertTrue(expected.equals(actual)) # check roundtrip self.assertDatasetIdentical(ds, Dataset.from_dataframe(actual)) # check pathological cases df = pd.DataFrame([1]) actual = Dataset.from_dataframe(df) expected = Dataset({0: ('index', [1])}) self.assertDatasetIdentical(expected, actual) df = pd.DataFrame() actual = Dataset.from_dataframe(df) expected = Dataset() self.assertDatasetIdentical(expected, actual)