def _prepare_plot(self, df, values): """ convert the data into a pivot table for plotting with mean as the values, and the two non-data columns as the index and column values. Arguments: df: DataFrame with {vls} column Returns: pivot table of {vls}, OrderedDict mapping group labels to dataframe rows representing them """.format(vls=self.values_column) # TODO: should this work on samples without mean? if values not in df.columns: raise ValueError("dataframe must have {} column".format(values)) pvt, fgroups, tgroups = pivot_table(df, values=values, label_callback=self.label_callback) # we want to have groups from-first, but without overwriting # any from-groups with to-groups groups = fgroups for key, value in tgroups.items(): if key not in groups: groups[key] = value return pvt, groups
def plot(self, dataframe, values=None): """ plot a heatplot for dataframe """ import numpy as np kw = {'values': values} if values is not None else {} pvt, fg, tg = pivot_table( dataframe, label_callback=self.label_callback, **kw) f = plt.figure(figsize=np.array(pvt.shape) * 0.3) hm = heatmap(pvt, cmap=self.cmap, cbar=self.cbar, xticklabels=True, yticklabels=True) return f
def test_columns_mismatch(self): """ both columns do not need to contain the same values """ df = pd.DataFrame(OrderedDict([ (MEAN, [1, 2]), ('pre', ['a', 'b']), ('post', ['c', 'c'])])) pd.testing.assert_frame_equal( df.pivot_table(index='pre', columns='post', values=MEAN), pivot_table(df, 'pre', 'post', MEAN)[0])
def test_basic(self): """test the returned table for simple input""" df =\ pd.DataFrame(OrderedDict([ (MEAN, [1, 2, 1, 2]), ('pre', ['a', 'a', 'b', 'b']), ('post', ['a', 'b', 'a', 'b'])])) table = pivot_table(df, 'pre', 'post', MEAN)[0] pd.testing.assert_frame_equal( table, df.pivot_table(index='pre', columns='post', values=MEAN))
def test_duplicate_pathways(self): """ when pathways are duplicated, take their mean as pandas does by default """ df = pd.DataFrame(OrderedDict([ ((MEAN, ''), [1, 2, 1, 2, 3]), ((STD, ''), [1, 2, 3, 1, 3]), (('pre', 'mtype'), ['a', 'a', 'b', 'b', 'b']), (('post', 'mtype'), ['a', 'b', 'a', 'b', 'b'])])) pd.testing.assert_frame_equal( pd.DataFrame({ 'pre: mtype': ['a', 'a', 'b', 'b'], 'post: mtype': ['a', 'b', 'a', 'b'], MEAN: [1, 2, 1, 2.5]}) .pivot_table(index='pre: mtype', columns='post: mtype', values=MEAN), pivot_table(df, 'pre', 'post', MEAN)[0])
def test_2_level_columns(self): """ test handling of multilevel dataframes the 'index' and 'columns' columns should be collapsed as necessary""" df = pd.DataFrame(OrderedDict([ ((MEAN, ''), [1, 2, 1, 2]), ((STD, ''), [1, 2, 3, 1]), (('pre', 'mtype'), ['a', 'a', 'b', 'b']), (('post', 'mtype'), ['a', 'b', 'a', 'b'])])) table = pivot_table(df, 'pre', 'post', MEAN)[0] edf =\ pd.DataFrame({ 'pre: mtype': ['a', 'a', 'b', 'b'], 'post: mtype': ['a', 'b', 'a', 'b'], MEAN: [1, 2, 1, 2]})\ .pivot_table( index='pre: mtype', columns='post: mtype', values=MEAN) print(table, "\n\n", edf) pd.testing.assert_frame_equal( table, edf)