def test_setitem(self): # not sure what else to do here series = self.frame['A'][::2] self.frame['col5'] = series self.assert_('col5' in self.frame) common.assert_dict_equal(series, self.frame['col5'], compare_keys=False) series = self.frame['A'] self.frame['col6'] = series common.assert_dict_equal(series, self.frame['col6'], compare_keys=False) self.assertRaises(Exception, self.frame.__setitem__, randn(len(self.frame) + 1)) # set ndarray arr = randn(len(self.frame)) self.frame['col9'] = arr self.assert_((self.frame['col9'] == arr).all()) # set value, do out of order for DataMatrix self.frame['col7'] = 5 assert ((self.frame['col7'] == 5).all()) self.frame['col8'] = 'foo' assert ((self.frame['col8'] == 'foo').all()) smaller = self.frame[:2] smaller['col10'] = ['1', '2'] self.assertEqual(smaller['col10'].dtype, np.object_) self.assert_((smaller['col10'] == ['1', '2']).all())
def test__init__featuredata(self, expression_data_no_na, expression_feature_data, expression_feature_rename_col): from flotilla.data_model.base import BaseData, \ subsets_from_metadata, MINIMUM_FEATURE_SUBSET base_data = BaseData(expression_data_no_na, feature_data=expression_feature_data, feature_rename_col=expression_feature_rename_col) if expression_feature_rename_col is not None: feature_renamer_series = expression_feature_data[ expression_feature_rename_col] else: feature_renamer_series = pd.Series( expression_feature_data.index, index=expression_feature_data.index) feature_subsets = subsets_from_metadata(expression_feature_data, MINIMUM_FEATURE_SUBSET, 'features') feature_subsets['variant'] = base_data.variant pdt.assert_frame_equal(base_data.data_original, expression_data_no_na) pdt.assert_frame_equal(base_data.feature_data, expression_feature_data) pdt.assert_frame_equal(base_data.data, expression_data_no_na) pdt.assert_series_equal(base_data.feature_renamer_series, feature_renamer_series) pdt.assert_dict_equal(base_data.feature_subsets, feature_subsets)
def test_groupby_groups_datetimeindex(self): # GH#1430 periods = 1000 ind = pd.date_range(start='2012/1/1', freq='5min', periods=periods) df = DataFrame({'high': np.arange(periods), 'low': np.arange(periods)}, index=ind) grouped = df.groupby(lambda x: datetime(x.year, x.month, x.day)) # it works! groups = grouped.groups assert isinstance(list(groups.keys())[0], datetime) # GH#11442 index = pd.date_range('2015/01/01', periods=5, name='date') df = pd.DataFrame({'A': [5, 6, 7, 8, 9], 'B': [1, 2, 3, 4, 5]}, index=index) result = df.groupby(level='date').groups dates = ['2015-01-05', '2015-01-04', '2015-01-03', '2015-01-02', '2015-01-01'] expected = {pd.Timestamp(date): pd.DatetimeIndex([date], name='date') for date in dates} tm.assert_dict_equal(result, expected) grouped = df.groupby(level='date') for date in dates: result = grouped.get_group(date) data = [[df.loc[date, 'A'], df.loc[date, 'B']]] expected_index = pd.DatetimeIndex([date], name='date') expected = pd.DataFrame(data, columns=list('AB'), index=expected_index) tm.assert_frame_equal(result, expected)
def test_frame_to_dict_tz(self): # GH18372 When converting to dict with orient='records' columns of # datetime that are tz-aware were not converted to required arrays data = [(datetime(2017, 11, 18, 21, 53, 0, 219225, tzinfo=pytz.utc), ), (datetime( 2017, 11, 18, 22, 6, 30, 61810, tzinfo=pytz.utc, ), )] df = DataFrame(list(data), columns=[ "d", ]) result = df.to_dict(orient='records') expected = [ { 'd': Timestamp('2017-11-18 21:53:00.219225+0000', tz=pytz.utc) }, { 'd': Timestamp('2017-11-18 22:06:30.061810+0000', tz=pytz.utc) }, ] tm.assert_dict_equal(result[0], expected[0]) tm.assert_dict_equal(result[1], expected[1])
def test_combineFirst(self): series = Series(common.makeIntIndex(20).astype(float), index=common.makeIntIndex(20)) series_copy = series * 2 series_copy[::2] = np.NaN # nothing used from the input combined = series.combineFirst(series_copy) self.assert_(np.array_equal(combined, series)) # Holes filled from input combined = series_copy.combineFirst(series) self.assert_(np.isfinite(combined).all()) self.assert_(np.array_equal(combined[::2], series[::2])) self.assert_(np.array_equal(combined[1::2], series_copy[1::2])) # mixed types index = common.makeStringIndex(20) floats = Series(common.randn(20), index=index) strings = Series(common.makeStringIndex(10), index=index[::2]) combined = strings.combineFirst(floats) common.assert_dict_equal(strings, combined, compare_keys=False) common.assert_dict_equal(floats[1::2], combined, compare_keys=False) # corner case s = Series([1., 2, 3], index=[0, 1, 2]) result = s.combineFirst(Series([], index=[])) assert_series_equal(s, result)
def test_setitem(self): # not sure what else to do here series = self.frame['A'][::2] self.frame['col5'] = series self.assert_('col5' in self.frame) common.assert_dict_equal(series, self.frame['col5'], compare_keys=False) series = self.frame['A'] self.frame['col6'] = series common.assert_dict_equal(series, self.frame['col6'], compare_keys=False) self.assertRaises(Exception, self.frame.__setitem__, randn(len(self.frame) + 1)) # set ndarray arr = randn(len(self.frame)) self.frame['col9'] = arr self.assert_((self.frame['col9'] == arr).all()) # set value, do out of order for DataMatrix self.frame['col7'] = 5 assert((self.frame['col7'] == 5).all()) self.frame['col8'] = 'foo' assert((self.frame['col8'] == 'foo').all()) smaller = self.frame[:2] smaller['col10'] = ['1', '2'] self.assertEqual(smaller['col10'].dtype, np.object_) self.assert_((smaller['col10'] == ['1', '2']).all())
def test_to_dict(self): test_data = { 'A': { '1': 1, '2': 2 }, 'B': { '1': '1', '2': '2', '3': '3' }, } recons_data = DataFrame(test_data).to_dict() for k, v in compat.iteritems(test_data): for k2, v2 in compat.iteritems(v): assert v2 == recons_data[k][k2] recons_data = DataFrame(test_data).to_dict("l") for k, v in compat.iteritems(test_data): for k2, v2 in compat.iteritems(v): assert v2 == recons_data[k][int(k2) - 1] recons_data = DataFrame(test_data).to_dict("s") for k, v in compat.iteritems(test_data): for k2, v2 in compat.iteritems(v): assert v2 == recons_data[k][k2] recons_data = DataFrame(test_data).to_dict("sp") expected_split = { 'columns': ['A', 'B'], 'index': ['1', '2', '3'], 'data': [[1.0, '1'], [2.0, '2'], [np.nan, '3']] } tm.assert_dict_equal(recons_data, expected_split) recons_data = DataFrame(test_data).to_dict("r") expected_records = [{ 'A': 1.0, 'B': '1' }, { 'A': 2.0, 'B': '2' }, { 'A': np.nan, 'B': '3' }] assert isinstance(recons_data, list) assert len(recons_data) == 3 for l, r in zip(recons_data, expected_records): tm.assert_dict_equal(l, r) # GH10844 recons_data = DataFrame(test_data).to_dict("i") for k, v in compat.iteritems(test_data): for k2, v2 in compat.iteritems(v): assert v2 == recons_data[k2][k]
def test_init(self, step, vmax, logbf_thresh): from flotilla.compute.splicing import ModalityEstimator, \ ModalityModel estimator = ModalityEstimator(step, vmax, logbf_thresh) true_parameters = np.arange(2, vmax + step, step).astype(float) true_exclusion = ModalityModel(1, true_parameters) true_inclusion = ModalityModel(true_parameters, 1) true_middle = ModalityModel(true_parameters+3, true_parameters+3) true_bimodal = ModalityModel(1 / (true_parameters+3), 1 / (true_parameters+3)) true_one_param_models = {'Psi~1': true_inclusion, 'Psi~0': true_exclusion} true_two_param_models = {'bimodal': true_bimodal, 'middle': true_middle} npt.assert_equal(estimator.step, step) npt.assert_equal(estimator.vmax, vmax) npt.assert_equal(estimator.logbf_thresh, logbf_thresh) npt.assert_equal(estimator.parameters, true_parameters) npt.assert_equal(estimator.exclusion_model, true_exclusion) npt.assert_equal(estimator.inclusion_model, true_inclusion) npt.assert_equal(estimator.middle_model, true_middle) npt.assert_equal(estimator.bimodal_model, true_bimodal) pdt.assert_dict_equal(estimator.one_param_models, true_one_param_models) pdt.assert_dict_equal(estimator.two_param_models, true_two_param_models)
def test_dict_complex(self): x = {'foo': 1.0 + 1.0j, 'bar': 2.0 + 2.0j} x_rec = self.encode_decode(x) tm.assert_dict_equal(x, x_rec) for key in x: tm.assert_class_equal(x[key], x_rec[key], obj="complex value")
def test_convert_list(): obj = r('list(a=1, b=2, c=3)') converted = convert_robj(obj) expected = {'a': [1], 'b': [2], 'c': [3]} _test.assert_dict_equal(converted, expected)
def test_dict_numpy_complex(self): x = {"foo": np.complex128(1.0 + 1.0j), "bar": np.complex128(2.0 + 2.0j)} x_rec = self.encode_decode(x) tm.assert_dict_equal(x, x_rec) for key in x: tm.assert_class_equal(x[key], x_rec[key], obj="numpy complex128")
def test_convert_nested_list(): obj = r('list(a=list(foo=1, bar=2))') converted = convert_robj(obj) expected = {'a': {'foo': [1], 'bar': [2]}} _test.assert_dict_equal(converted, expected)
def test_empty_read(_output_dir: str): df = MyTfs(plane="X", directory=_output_dir) write_tfs(df.get_filename(), df, save_index="NAME") df_read = MyTfs(plane="X", directory=_output_dir).read() assert_frame_equal(df, df_read, check_exact=False) # float precision can be an issue assert_dict_equal(df.headers, df_read.headers, compare_keys=True)
def test_filled_write(_output_dir: str, _filled_tfs: MyTfs): df = _filled_tfs(plane="X", directory=_output_dir) df.write() assert pathlib.Path(df.get_filename()).is_file() df_read = read_tfs(df.get_filename(), index="NAME") assert_frame_equal(df, df_read) assert_dict_equal(df.headers, df_read.headers, compare_keys=True)
def test_booleanindex(self): boolIdx = np.repeat(True, len(self.strIndex)).astype(bool) boolIdx[5:30:2] = False subIndex = self.strIndex[boolIdx] common.assert_dict_equal(tseries.map_indices(subIndex), subIndex.indexMap)
def test_read_dta18(self): parsed_118 = self.read_dta(self.dta22_118) parsed_118["Bytes"] = parsed_118["Bytes"].astype('O') expected = DataFrame.from_records( [['Cat', 'Bogota', u'Bogotá', 1, 1.0, u'option b Ünicode', 1.0], ['Dog', 'Boston', u'Uzunköprü', np.nan, np.nan, np.nan, np.nan], ['Plane', 'Rome', u'Tromsø', 0, 0.0, 'option a', 0.0], ['Potato', 'Tokyo', u'Elâzığ', -4, 4.0, 4, 4], ['', '', '', 0, 0.3332999, 'option a', 1/3.] ], columns=['Things', 'Cities', 'Unicode_Cities_Strl', 'Ints', 'Floats', 'Bytes', 'Longs']) expected["Floats"] = expected["Floats"].astype(np.float32) for col in parsed_118.columns: tm.assert_almost_equal(parsed_118[col], expected[col]) with StataReader(self.dta22_118) as rdr: vl = rdr.variable_labels() vl_expected = {u'Unicode_Cities_Strl': u'Here are some strls with Ünicode chars', u'Longs': u'long data', u'Things': u'Here are some things', u'Bytes': u'byte data', u'Ints': u'int data', u'Cities': u'Here are some cities', u'Floats': u'float data'} tm.assert_dict_equal(vl, vl_expected) self.assertEqual(rdr.data_label, u'This is a Ünicode data label')
def test_single_exon_alternative_events(self, splice_graph, exon1_i, exon1_name, mutually_exclusive_events, skipped_exon_events): test = splice_graph.single_exon_alternative_events(exon1_i, exon1_name) true = {'se': skipped_exon_events, 'mxe': mutually_exclusive_events} pdt.assert_dict_equal(test, true)
def test_tfs_read_write_read_pathlib_input(_tfs_file_pathlib: pathlib.Path, _test_file: str): original = read_tfs(_tfs_file_pathlib) write_tfs(_test_file, original) new = read_tfs(_test_file) assert_frame_equal(original, new) assert_dict_equal(original.headers, new.headers, compare_keys=True)
def test_large_dataframe(pca_large_dataframe, kwargs): from flotilla.visualize.decomposition import DecompositionViz dv = DecompositionViz(pca_large_dataframe.reduced_space, pca_large_dataframe.components_, pca_large_dataframe.explained_variance_ratio_, **kwargs) x_pc = kwargs['x_pc'] y_pc = kwargs['y_pc'] pcs = [x_pc, y_pc] true_top_features = set([]) true_pc_loadings_labels = {} true_pc_loadings = {} for pc in pcs: x = pca_large_dataframe.components_.ix[pc].copy() x.sort(ascending=True) half_features = int(kwargs['n_top_pc_features'] / 2) if len(x) > kwargs['n_top_pc_features']: a = x[:half_features] b = x[-half_features:] labels = np.r_[a.index, b.index] true_pc_loadings[pc] = np.r_[a, b] else: labels = x.index true_pc_loadings[pc] = x true_pc_loadings_labels[pc] = labels true_top_features.update(labels) pdt.assert_numpy_array_equal(dv.top_features, true_top_features) pdt.assert_dict_equal(dv.pc_loadings_labels, true_pc_loadings_labels) pdt.assert_dict_equal(dv.pc_loadings, true_pc_loadings)
def test_init(self, logbf_thresh): from anchor import BayesianModalities, ModalityModel from anchor.bayesian import ONE_PARAMETER_MODELS, \ TWO_PARAMETER_MODELS estimator = BayesianModalities( one_parameter_models=ONE_PARAMETER_MODELS, two_parameter_models=TWO_PARAMETER_MODELS, logbf_thresh=logbf_thresh) true_one_param_models = { k: ModalityModel(**v) for k, v in ONE_PARAMETER_MODELS.items() } true_two_param_models = { k: ModalityModel(**v) for k, v in TWO_PARAMETER_MODELS.items() } npt.assert_equal(estimator.logbf_thresh, logbf_thresh) pdt.assert_dict_equal(estimator.one_param_models, true_one_param_models) pdt.assert_dict_equal(estimator.two_param_models, true_two_param_models)
def test_read_dta18(self): parsed_118 = self.read_dta(self.dta22_118) parsed_118["Bytes"] = parsed_118["Bytes"].astype('O') expected = DataFrame.from_records( [['Cat', 'Bogota', u'Bogotá', 1, 1.0, u'option b Ünicode', 1.0], ['Dog', 'Boston', u'Uzunköprü', np.nan, np.nan, np.nan, np.nan], ['Plane', 'Rome', u'Tromsø', 0, 0.0, 'option a', 0.0], ['Potato', 'Tokyo', u'Elâzığ', -4, 4.0, 4, 4], ['', '', '', 0, 0.3332999, 'option a', 1 / 3.]], columns=[ 'Things', 'Cities', 'Unicode_Cities_Strl', 'Ints', 'Floats', 'Bytes', 'Longs' ]) expected["Floats"] = expected["Floats"].astype(np.float32) for col in parsed_118.columns: tm.assert_almost_equal(parsed_118[col], expected[col]) with StataReader(self.dta22_118) as rdr: vl = rdr.variable_labels() vl_expected = { u'Unicode_Cities_Strl': u'Here are some strls with Ünicode chars', u'Longs': u'long data', u'Things': u'Here are some things', u'Bytes': u'byte data', u'Ints': u'int data', u'Cities': u'Here are some cities', u'Floats': u'float data' } tm.assert_dict_equal(vl, vl_expected) self.assertEqual(rdr.data_label, u'This is a Ünicode data label')
def test_read_dta18(self): parsed_118 = self.read_dta(self.dta22_118) parsed_118["Bytes"] = parsed_118["Bytes"].astype("O") expected = DataFrame.from_records( [ ["Cat", "Bogota", u"Bogotá", 1, 1.0, u"option b Ünicode", 1.0], ["Dog", "Boston", u"Uzunköprü", np.nan, np.nan, np.nan, np.nan], ["Plane", "Rome", u"Tromsø", 0, 0.0, "option a", 0.0], ["Potato", "Tokyo", u"Elâzığ", -4, 4.0, 4, 4], ["", "", "", 0, 0.3332999, "option a", 1 / 3.0], ], columns=["Things", "Cities", "Unicode_Cities_Strl", "Ints", "Floats", "Bytes", "Longs"], ) expected["Floats"] = expected["Floats"].astype(np.float32) for col in parsed_118.columns: tm.assert_almost_equal(parsed_118[col], expected[col]) with StataReader(self.dta22_118) as rdr: vl = rdr.variable_labels() vl_expected = { u"Unicode_Cities_Strl": u"Here are some strls with Ünicode chars", u"Longs": u"long data", u"Things": u"Here are some things", u"Bytes": u"byte data", u"Ints": u"int data", u"Cities": u"Here are some cities", u"Floats": u"float data", } tm.assert_dict_equal(vl, vl_expected) self.assertEqual(rdr.data_label, u"This is a Ünicode data label")
def test_large_dataframe(pca_large_dataframe, kwargs): from flotilla.visualize.decomposition import DecompositionViz dv = DecompositionViz(pca_large_dataframe.reduced_space, pca_large_dataframe.components_, pca_large_dataframe.explained_variance_ratio_, **kwargs) x_pc = kwargs['x_pc'] y_pc = kwargs['y_pc'] pcs = [x_pc, y_pc] true_top_features = set([]) true_pc_loadings_labels = {} true_pc_loadings = {} for pc in pcs: x = pca_large_dataframe.components_.ix[pc].copy() x.sort(ascending=True) half_features = int(kwargs['n_top_pc_features'] / 2) if len(x) > kwargs['n_top_pc_features']: a = x[:half_features] b = x[-half_features:] labels = np.r_[a.index, b.index] true_pc_loadings[pc] = np.r_[a, b] else: labels = x.index true_pc_loadings[pc] = x true_pc_loadings_labels[pc] = labels true_top_features.update(labels) pdt.assert_array_equal(dv.top_features, true_top_features) pdt.assert_dict_equal(dv.pc_loadings_labels, true_pc_loadings_labels) pdt.assert_dict_equal(dv.pc_loadings, true_pc_loadings)
def test_index_groupby(self): int_idx = Index(range(6)) float_idx = Index(np.arange(0, 0.6, 0.1)) obj_idx = Index("A B C D E F".split()) dt_idx = pd.date_range("2013-01-01", freq="M", periods=6) for idx in [int_idx, float_idx, obj_idx, dt_idx]: to_groupby = np.array([1, 2, np.nan, np.nan, 2, 1]) tm.assert_dict_equal( idx.groupby(to_groupby), {1.0: idx[[0, 5]], 2.0: idx[[1, 4]]} ) to_groupby = Index( [ datetime(2011, 11, 1), datetime(2011, 12, 1), pd.NaT, pd.NaT, datetime(2011, 12, 1), datetime(2011, 11, 1), ], tz="UTC", ).values ex_keys = [Timestamp("2011-11-01"), Timestamp("2011-12-01")] expected = {ex_keys[0]: idx[[0, 5]], ex_keys[1]: idx[[1, 4]]} tm.assert_dict_equal(idx.groupby(to_groupby), expected)
def test_combine_first(self): values = tm.makeIntIndex(20).values.astype(float) series = Series(values, index=tm.makeIntIndex(20)) series_copy = series * 2 series_copy[::2] = np.NaN # nothing used from the input combined = series.combine_first(series_copy) self.assert_(np.array_equal(combined, series)) # Holes filled from input combined = series_copy.combine_first(series) self.assert_(np.isfinite(combined).all()) self.assert_(np.array_equal(combined[::2], series[::2])) self.assert_(np.array_equal(combined[1::2], series_copy[1::2])) # mixed types index = tm.makeStringIndex(20) floats = Series(tm.randn(20), index=index) strings = Series(tm.makeStringIndex(10), index=index[::2]) combined = strings.combine_first(floats) tm.assert_dict_equal(strings, combined, compare_keys=False) tm.assert_dict_equal(floats[1::2], combined, compare_keys=False) # corner case s = Series([1., 2, 3], index=[0, 1, 2]) result = s.combine_first(Series([], index=[])) assert_series_equal(s, result)
def test_001_process(self): """Test ISO2 primary_geo; build a date day, month, year; no primary_date; feature qualifies another feature.""" # Define mixmasta inputs: mp = f"inputs{sep}test1_input.json" fp = f"inputs{sep}test1_input.csv" geo = 'admin2' outf = f"outputs{sep}unittests" # Process: df, dct = mixmasta.process(fp, mp, geo, outf) # Load expected output: output_df = pd.read_csv(f'outputs{sep}test1_output.csv', index_col=False) output_df = mixmasta.optimize_df_types(output_df) with open(f'outputs{sep}test1_dict.json') as f: output_dict = json.loads(f.read()) # Sort both data frames and reindex for comparison,. cols = ['timestamp','country','admin1','admin2','admin3','lat','lng','feature','value'] df.sort_values(by=cols, inplace=True) output_df.sort_values(by=cols, inplace=True) df.reset_index(drop=True, inplace=True) output_df.reset_index(drop =True, inplace=True) # Assertions assert_frame_equal(df, output_df) assert_dict_equal(dct, output_dict)
def test_convert_list(self): obj = r("list(a=1, b=2, c=3)") converted = com.convert_robj(obj) expected = {"a": [1], "b": [2], "c": [3]} tm.assert_dict_equal(converted, expected)
def test_convert_nested_list(self): obj = r("list(a=list(foo=1, bar=2))") converted = com.convert_robj(obj) expected = {"a": {"foo": [1], "bar": [2]}} tm.assert_dict_equal(converted, expected)
def test_shift(self): shifted = self.ts.shift(1) unshifted = shifted.shift(-1) common.assert_dict_equal(unshifted.valid(), self.ts, compare_keys=False) offset = datetools.bday shifted = self.ts.shift(1, offset=offset) unshifted = shifted.shift(-1, offset=offset) assert_series_equal(unshifted, self.ts) unshifted = self.ts.shift(0, offset=offset) assert_series_equal(unshifted, self.ts) shifted = self.ts.shift(1, timeRule='WEEKDAY') unshifted = shifted.shift(-1, timeRule='WEEKDAY') assert_series_equal(unshifted, self.ts) # corner case unshifted = self.ts.shift(0) assert_series_equal(unshifted, self.ts)
def test_006_process(self): """Test multi primary_geo, resolve_to_gadm""" # Define mixmasta inputs: mp = f'inputs{sep}test6_hoa_conflict_input.json' fp = f'inputs{sep}test6_hoa_conflict_input.csv' geo = 'admin2' outf = f'outputs{sep}unittests' # Process: df, dct = mixmasta.process(fp, mp, geo, outf) # Load expected output: output_df = pd.read_csv(f'outputs{sep}test6_hoa_conflict_output.csv', index_col=False) output_df = mixmasta.optimize_df_types(output_df) with open(f'outputs{sep}test6_hoa_conflict_dict.json') as f: output_dict = json.loads(f.read()) # Sort both data frames and reindex for comparison,. cols = ['timestamp','country','admin1','admin2','admin3','lat','lng','feature','value'] df.sort_values(by=cols, inplace=True) output_df.sort_values(by=cols, inplace=True) df.reset_index(drop=True, inplace=True) output_df.reset_index(drop =True, inplace=True) # Make the datatypes the same for value/feature and qualifying columns. df['value'] = df['value'].astype('str') df['feature'] = df['feature'].astype('str') output_df['value'] = output_df['value'].astype('str') output_df['feature'] = output_df['feature'].astype('str') # Assertions assert_frame_equal(df, output_df, check_categorical = False) assert_dict_equal(dct, output_dict)
def test_save(self, study, tmpdir): from flotilla.datapackage import name_to_resource study_name = 'test_save' study.supplemental.expression_corr = study.expression.data.corr() study.save(study_name, flotilla_dir=tmpdir) assert len(tmpdir.listdir()) == 1 save_dir = tmpdir.listdir()[0] with open('{}/datapackage.json'.format(save_dir)) as f: test_datapackage = json.load(f) assert study_name == save_dir.purebasename # resource_keys_to_ignore = ('compression', 'format', 'path', # 'url') keys_from_study = { 'splicing': [], 'expression': ['thresh', 'log_base', 'plus_one'], 'metadata': [ 'phenotype_order', 'phenotype_to_color', 'phenotype_col', 'phenotype_to_marker', 'pooled_col', 'minimum_samples' ], 'mapping_stats': ['number_mapped_col', 'min_reads'], 'expression_feature': ['rename_col', 'ignore_subset_cols'], 'splicing_feature': ['rename_col', 'ignore_subset_cols', 'expression_id_col'], 'gene_ontology': [] } resource_names = keys_from_study.keys() # Add auto-generated attributes into the true datapackage for name, keys in keys_from_study.iteritems(): resource = name_to_resource(test_datapackage, name) for key in keys: command = self.get_data_eval_command(name, key) test_value = resource[key] true_value = eval(command) if isinstance(test_value, dict): pdt.assert_dict_equal(test_value, true_value) elif isinstance(test_value, Iterable): pdt.assert_array_equal(test_value, true_value) for name in resource_names: resource = name_to_resource(test_datapackage, name) path = '{}.csv.gz'.format(name) assert resource['path'] == path test_df = pd.read_csv('{}/{}/{}'.format(tmpdir, study_name, path), index_col=0, compression='gzip') command = self.get_data_eval_command(name, 'data_original') true_df = eval(command) pdt.assert_frame_equal(test_df, true_df) version = semantic_version.Version(study.version) version.patch += 1 assert str(version) == test_datapackage['datapackage_version'] assert study_name == test_datapackage['name']
def test_single_exon_alternative_events(self, splice_graph, exon1_i, exon1_name, mutually_exclusive_events, skipped_exon_events): test = splice_graph.single_exon_alternative_events( exon1_i, exon1_name) true = {'se': skipped_exon_events, 'mxe': mutually_exclusive_events} pdt.assert_dict_equal(test, true)
def test_valid(self): ts = self.ts.copy() ts[::2] = np.NaN result = ts.valid() self.assertEqual(len(result), ts.count()) tm.assert_dict_equal(result, ts, compare_keys=False)
def test_tfs_write_read(_dataframe: TfsDataFrame, _test_file: str): write_tfs(_test_file, _dataframe) assert pathlib.Path(_test_file).is_file() new = read_tfs(_test_file) assert_frame_equal(_dataframe, new, check_exact=False) # float precision can be an issue assert_dict_equal(_dataframe.headers, new.headers, compare_keys=True)
def test_dict_numpy_complex(self): x = {'foo': np.complex128(1.0 + 1.0j), 'bar': np.complex128(2.0 + 2.0j)} x_rec = self.encode_decode(x) tm.assert_dict_equal(x, x_rec) for key in x: tm.assert_class_equal(x[key], x_rec[key], obj="numpy complex128")
def test__create_dict(self, all_transcripts_of_exon, strand_true_exc_nmd, nmd_exons, true_dict): test = nmd_exons._get_exons_from_transcripts(all_transcripts_of_exon, strand_true_exc_nmd) test = dict( (key, [v.id for v in values]) for key, values in test.items()) true = true_dict pdt.assert_dict_equal(test, true)
def testit(index): pickled = pickle.dumps(index) unpickled = pickle.loads(pickled) self.assert_(isinstance(unpickled, Index)) self.assert_(np.array_equal(unpickled, index)) tm.assert_dict_equal(unpickled.indexMap, index.indexMap)
def testit(index): pickled = pickle.dumps(index) unpickled = pickle.loads(pickled) self.assert_(isinstance(unpickled, Index)) self.assert_(np.array_equal(unpickled, index)) common.assert_dict_equal(unpickled.indexMap, index.indexMap)
def test_combineFrame(self): frame_copy = self.frame.reindex(self.frame.index[::2]) del frame_copy['D'] frame_copy['C'][:5] = nan added = self.frame + frame_copy tm.assert_dict_equal(added['A'].valid(), self.frame['A'] * 2, compare_keys=False) self.assertTrue( np.isnan(added['C'].reindex(frame_copy.index)[:5]).all()) # assert(False) self.assertTrue(np.isnan(added['D']).all()) self_added = self.frame + self.frame self.assertTrue(self_added.index.equals(self.frame.index)) added_rev = frame_copy + self.frame self.assertTrue(np.isnan(added['D']).all()) self.assertTrue(np.isnan(added_rev['D']).all()) # corner cases # empty plus_empty = self.frame + self.empty self.assertTrue(np.isnan(plus_empty.values).all()) empty_plus = self.empty + self.frame self.assertTrue(np.isnan(empty_plus.values).all()) empty_empty = self.empty + self.empty self.assertTrue(empty_empty.empty) # out of order reverse = self.frame.reindex(columns=self.frame.columns[::-1]) assert_frame_equal(reverse + self.frame, self.frame * 2) # mix vs float64, upcast added = self.frame + self.mixed_float _check_mixed_float(added, dtype='float64') added = self.mixed_float + self.frame _check_mixed_float(added, dtype='float64') # mix vs mix added = self.mixed_float + self.mixed_float2 _check_mixed_float(added, dtype=dict(C=None)) added = self.mixed_float2 + self.mixed_float _check_mixed_float(added, dtype=dict(C=None)) # with int added = self.frame + self.mixed_int _check_mixed_float(added, dtype='float64')
def test_get_level_lengths_un_sorted(self): index = pd.MultiIndex.from_arrays([ [1, 1, 2, 1], ['a', 'b', 'b', 'd'] ]) expected = {(0, 0): 2, (0, 2): 1, (0, 3): 1, (1, 0): 1, (1, 1): 1, (1, 2): 1, (1, 3): 1} result = _get_level_lengths(index) tm.assert_dict_equal(result, expected)
def test_tfs_write_read_autoindex(_dataframe: TfsDataFrame, _test_file: str): df = _dataframe.set_index("a") df1 = _dataframe.set_index("a") write_tfs(_test_file, df, save_index=True) assert_frame_equal(df, df1) df_read = read_tfs(_test_file) assert_index_equal(df.index, df_read.index, check_exact=False) assert_dict_equal(_dataframe.headers, df_read.headers, compare_keys=True)
def test_groupby(idx): groups = idx.groupby(np.array([1, 1, 1, 2, 2, 2])) labels = idx.get_values().tolist() exp = {1: labels[:3], 2: labels[3:]} tm.assert_dict_equal(groups, exp) # GH5620 groups = idx.groupby(idx) exp = {key: [key] for key in idx} tm.assert_dict_equal(groups, exp)
def test__get_junction_reads(bamfile, uniquely, multi): from outrigger.io.bam import _get_junction_reads test_uniquely, test_multi = _get_junction_reads(bamfile) true_uniquely = uniquely true_multi = multi pdt.assert_dict_equal(test_uniquely, true_uniquely) pdt.assert_dict_equal(test_multi, true_multi)
def test_to_dict(self, mapping): test_data = { 'A': {'1': 1, '2': 2}, 'B': {'1': '1', '2': '2', '3': '3'}, } # GH16122 recons_data = DataFrame(test_data).to_dict(into=mapping) for k, v in test_data.items(): for k2, v2 in v.items(): assert (v2 == recons_data[k][k2]) recons_data = DataFrame(test_data).to_dict("l", mapping) for k, v in test_data.items(): for k2, v2 in v.items(): assert (v2 == recons_data[k][int(k2) - 1]) recons_data = DataFrame(test_data).to_dict("s", mapping) for k, v in test_data.items(): for k2, v2 in v.items(): assert (v2 == recons_data[k][k2]) recons_data = DataFrame(test_data).to_dict("sp", mapping) expected_split = {'columns': ['A', 'B'], 'index': ['1', '2', '3'], 'data': [[1.0, '1'], [2.0, '2'], [np.nan, '3']]} tm.assert_dict_equal(recons_data, expected_split) recons_data = DataFrame(test_data).to_dict("r", mapping) expected_records = [{'A': 1.0, 'B': '1'}, {'A': 2.0, 'B': '2'}, {'A': np.nan, 'B': '3'}] assert isinstance(recons_data, list) assert (len(recons_data) == 3) for l, r in zip(recons_data, expected_records): tm.assert_dict_equal(l, r) # GH10844 recons_data = DataFrame(test_data).to_dict("i") for k, v in test_data.items(): for k2, v2 in v.items(): assert (v2 == recons_data[k2][k]) df = DataFrame(test_data) df['duped'] = df[df.columns[0]] recons_data = df.to_dict("i") comp_data = test_data.copy() comp_data['duped'] = comp_data[df.columns[0]] for k, v in comp_data.items(): for k2, v2 in v.items(): assert (v2 == recons_data[k2][k])
def test_establish_reducer_use_existing(self): from cupcake.smush.base import SmushPlotterBase pca_kws = {} n_components = 2 reducer = PCA(n_components=n_components, **pca_kws) p = SmushPlotterBase() p.establish_reducer(reducer) assert isinstance(p.reducer, type(reducer)) pdt.assert_dict_equal(p.reducer.get_params(), reducer.get_params())
def test_na_values_dict_aliasing(self): na_values = {'a': 2, 'b': 1} na_values_copy = na_values.copy() names = ['a', 'b'] data = '1,2\n2,1' expected = DataFrame([[1.0, 2.0], [np.nan, np.nan]], columns=names) out = self.read_csv(StringIO(data), names=names, na_values=na_values) tm.assert_frame_equal(out, expected) tm.assert_dict_equal(na_values, na_values_copy)
def test_na_values_dict_aliasing(all_parsers): parser = all_parsers na_values = {"a": 2, "b": 1} na_values_copy = na_values.copy() names = ["a", "b"] data = "1,2\n2,1" expected = DataFrame([[1.0, 2.0], [np.nan, np.nan]], columns=names) result = parser.read_csv(StringIO(data), names=names, na_values=na_values) tm.assert_frame_equal(result, expected) tm.assert_dict_equal(na_values, na_values_copy)
def test_observed_groups_with_nan(observed): # GH 24740 df = pd.DataFrame({'cat': pd.Categorical(['a', np.nan, 'a'], categories=['a', 'b', 'd']), 'vals': [1, 2, 3]}) g = df.groupby('cat', observed=observed) result = g.groups if observed: expected = {'a': Index([0, 2], dtype='int64')} else: expected = {'a': Index([0, 2], dtype='int64'), 'b': Index([], dtype='int64'), 'd': Index([], dtype='int64')} tm.assert_dict_equal(result, expected)
def test_frame_to_dict_tz(self): # GH18372 When converting to dict with orient='records' columns of # datetime that are tz-aware were not converted to required arrays data = [(datetime(2017, 11, 18, 21, 53, 0, 219225, tzinfo=pytz.utc),), (datetime(2017, 11, 18, 22, 6, 30, 61810, tzinfo=pytz.utc,),)] df = DataFrame(list(data), columns=["d", ]) result = df.to_dict(orient='records') expected = [ {'d': Timestamp('2017-11-18 21:53:00.219225+0000', tz=pytz.utc)}, {'d': Timestamp('2017-11-18 22:06:30.061810+0000', tz=pytz.utc)}, ] tm.assert_dict_equal(result[0], expected[0]) tm.assert_dict_equal(result[1], expected[1])
def test__report_read_positions(bamfile): from outrigger.io.bam import _report_read_positions bam = pysam.AlignmentFile(bamfile, 'rb') test = collections.Counter() for read in bam: _report_read_positions(read, test) break bam.close() true = {('chr2', 136713559, 136713559, '+'): 1} pdt.assert_dict_equal(test, true)
def test_list_grouper_with_nat(self): # GH 14715 df = pd.DataFrame({'date': pd.date_range('1/1/2011', periods=365, freq='D')}) df.iloc[-1] = pd.NaT grouper = pd.Grouper(key='date', freq='AS') # Grouper in a list grouping result = df.groupby([grouper]) expected = {pd.Timestamp('2011-01-01'): pd.Index(list(range(364)))} tm.assert_dict_equal(result.groups, expected) # Test case without a list result = df.groupby(grouper) expected = {pd.Timestamp('2011-01-01'): 365} tm.assert_dict_equal(result.groups, expected)