def test__init__featuredata(self, expression_data_no_na, expression_feature_data, expression_feature_rename_col): from flotilla.data_model.base import BaseData, \ subsets_from_metadata, MINIMUM_FEATURE_SUBSET base_data = BaseData(expression_data_no_na, feature_data=expression_feature_data, feature_rename_col=expression_feature_rename_col) if expression_feature_rename_col is not None: feature_renamer_series = expression_feature_data[ expression_feature_rename_col] else: feature_renamer_series = pd.Series( expression_feature_data.index, index=expression_feature_data.index) feature_subsets = subsets_from_metadata(expression_feature_data, MINIMUM_FEATURE_SUBSET, 'features') feature_subsets['variant'] = base_data.variant pdt.assert_frame_equal(base_data.data_original, expression_data_no_na) pdt.assert_frame_equal(base_data.feature_data, expression_feature_data) pdt.assert_frame_equal(base_data.data, expression_data_no_na) pdt.assert_series_equal(base_data.feature_renamer_series, feature_renamer_series) pdt.assert_dict_equal(base_data.feature_subsets, feature_subsets)
def test_subsets_from_metadata(expression_feature_data): from flotilla.data_model.base import subsets_from_metadata minimum = 5 subset_type = 'expression' metadata = expression_feature_data.copy() test_subsets = subsets_from_metadata(metadata, 5, 'expression') true_subsets = {} sorted_bool = (False, True) if true_subsets is not None: for col in expression_feature_data: if tuple(sorted(metadata[col].dropna().unique())) == sorted_bool: series = metadata[col].dropna() sample_subset = series[series].index true_subsets[col] = sample_subset else: grouped = metadata.groupby(col) sizes = grouped.size() filtered_sizes = sizes[sizes >= minimum] for group in filtered_sizes.keys(): if isinstance(group, bool): continue name = '{}: {}'.format(col, group) true_subsets[name] = grouped.groups[group] for sample_subset in true_subsets.copy().keys(): name = 'not ({})'.format(sample_subset) if 'False' in name or 'True' in name: continue if name not in true_subsets: in_features = metadata.index.isin(true_subsets[ sample_subset]) true_subsets[name] = metadata.index[~in_features] true_subsets['all {}'.format(subset_type)] = metadata.index pdt.assert_dict_equal(true_subsets, test_subsets) # Make sure every column is in the subsets for col in expression_feature_data: if tuple(sorted(metadata[col].dropna().unique())) == sorted_bool: assert col in test_subsets
def test_subsets_from_metadata(expression_feature_data): from flotilla.data_model.base import subsets_from_metadata minimum = 5 subset_type = 'expression' metadata = expression_feature_data.copy() test_subsets = subsets_from_metadata(metadata, 5, 'expression') true_subsets = {} sorted_bool = (False, True) if true_subsets is not None: for col in expression_feature_data: if tuple(sorted(metadata[col].dropna().unique())) == sorted_bool: series = metadata[col].dropna() sample_subset = series[series].index true_subsets[col] = sample_subset else: grouped = metadata.groupby(col) sizes = grouped.size() filtered_sizes = sizes[sizes >= minimum] for group in filtered_sizes.keys(): if isinstance(group, bool): continue name = '{}: {}'.format(col, group) true_subsets[name] = grouped.groups[group] for sample_subset in true_subsets.keys(): name = 'not ({})'.format(sample_subset) if 'False' in name or 'True' in name: continue if name not in true_subsets: in_features = metadata.index.isin(true_subsets[sample_subset]) true_subsets[name] = metadata.index[~in_features] true_subsets['all {}'.format(subset_type)] = metadata.index pdt.assert_dict_equal(true_subsets, test_subsets) # Make sure every column is in the subsets for col in expression_feature_data: if tuple(sorted(metadata[col].dropna().unique())) == sorted_bool: assert col in test_subsets
def test_init(self, phenotype_order, phenotype_to_color, phenotype_to_marker): from flotilla.data_model.metadata import MetaData from flotilla.data_model.base import subsets_from_metadata from flotilla.visualize.color import str_to_color test_metadata = MetaData(self.metadata, phenotype_order=phenotype_order, phenotype_to_color=phenotype_to_color, phenotype_to_marker=phenotype_to_marker, **self.kws) if phenotype_order is None: true_phenotype_order = list(sorted( test_metadata.unique_phenotypes)) else: true_phenotype_order = phenotype_order if phenotype_to_color is None: default_phenotype_to_color = \ test_metadata._default_phenotype_to_color true_phenotype_to_color = dict( (k, default_phenotype_to_color[k]) for k in true_phenotype_order) else: true_phenotype_to_color = {} for phenotype, color in phenotype_to_color.iteritems(): try: color = str_to_color[color] except KeyError: pass true_phenotype_to_color[phenotype] = color if phenotype_to_marker is None: markers = cycle(['o', '^', 's', 'v', '*', 'D', ]) def marker_factory(): return markers.next() true_phenotype_to_marker = defaultdict(marker_factory) for x in true_phenotype_order: true_phenotype_to_marker[x] else: true_phenotype_to_marker = phenotype_to_marker true_phenotype_transitions = zip(true_phenotype_order[:-1], true_phenotype_order[1:]) true_unique_phenotypes = self.metadata[self.phenotype_col].unique() true_n_phenotypes = len(true_unique_phenotypes) true_colors = map(mpl.colors.rgb2hex, sns.color_palette('husl', n_colors=true_n_phenotypes)) colors = iter(true_colors) true_default_phenotype_to_color = defaultdict(lambda: colors.next()) true_sample_id_to_phenotype = self.metadata[self.phenotype_col] true_phenotype_color_order = [true_phenotype_to_color[p] for p in true_phenotype_order] true_sample_id_to_color = \ dict((i, true_phenotype_to_color[true_sample_id_to_phenotype[i]]) for i in self.metadata.index) true_sample_subsets = subsets_from_metadata( self.metadata, self.kws['minimum_sample_subset'], 'samples') pdt.assert_frame_equal(test_metadata.data, self.metadata) pdt.assert_series_equal(test_metadata.sample_id_to_phenotype, true_sample_id_to_phenotype) pdt.assert_numpy_array_equal(test_metadata.unique_phenotypes, true_unique_phenotypes) pdt.assert_numpy_array_equal(test_metadata.n_phenotypes, len(true_unique_phenotypes)) pdt.assert_numpy_array_equal(test_metadata._default_phenotype_order, list(sorted(true_unique_phenotypes))) pdt.assert_numpy_array_equal(test_metadata.phenotype_order, true_phenotype_order) pdt.assert_numpy_array_equal(test_metadata.phenotype_transitions, true_phenotype_transitions) pdt.assert_numpy_array_equal(test_metadata._colors, true_colors) pdt.assert_numpy_array_equal(test_metadata._default_phenotype_to_color, true_default_phenotype_to_color) pdt.assert_dict_equal(test_metadata.phenotype_to_color, true_phenotype_to_color) pdt.assert_dict_equal(test_metadata.phenotype_to_marker, true_phenotype_to_marker) pdt.assert_numpy_array_equal(test_metadata.phenotype_color_order, true_phenotype_color_order) pdt.assert_dict_equal(test_metadata.sample_id_to_color, true_sample_id_to_color) pdt.assert_dict_equal(test_metadata.sample_subsets, true_sample_subsets)
def test_init(self, phenotype_order, phenotype_to_color, phenotype_to_marker): from flotilla.data_model.metadata import MetaData from flotilla.data_model.base import subsets_from_metadata from flotilla.visualize.color import str_to_color test_metadata = MetaData(self.metadata, phenotype_order=phenotype_order, phenotype_to_color=phenotype_to_color, phenotype_to_marker=phenotype_to_marker, **self.kws) if phenotype_order is None: true_phenotype_order = list(sorted( test_metadata.unique_phenotypes)) else: true_phenotype_order = phenotype_order if phenotype_to_color is None: default_phenotype_to_color = \ test_metadata._default_phenotype_to_color true_phenotype_to_color = dict( (k, default_phenotype_to_color[k]) for k in true_phenotype_order) else: true_phenotype_to_color = {} for phenotype, color in phenotype_to_color.iteritems(): try: color = str_to_color[color] except KeyError: pass true_phenotype_to_color[phenotype] = color if phenotype_to_marker is None: markers = cycle(['o', '^', 's', 'v', '*', 'D', ]) def marker_factory(): return markers.next() true_phenotype_to_marker = defaultdict(marker_factory) for x in true_phenotype_order: true_phenotype_to_marker[x] else: true_phenotype_to_marker = phenotype_to_marker true_phenotype_transitions = zip(true_phenotype_order[:-1], true_phenotype_order[1:]) true_unique_phenotypes = self.metadata[self.phenotype_col].unique() true_n_phenotypes = len(true_unique_phenotypes) true_colors = map(mpl.colors.rgb2hex, sns.color_palette('husl', n_colors=true_n_phenotypes)) colors = iter(true_colors) true_default_phenotype_to_color = defaultdict(lambda: colors.next()) true_sample_id_to_phenotype = self.metadata[self.phenotype_col] true_phenotype_color_order = [true_phenotype_to_color[p] for p in true_phenotype_order] true_sample_id_to_color = \ dict((i, true_phenotype_to_color[true_sample_id_to_phenotype[i]]) for i in self.metadata.index) true_sample_subsets = subsets_from_metadata( self.metadata, self.kws['minimum_sample_subset'], 'samples') pdt.assert_frame_equal(test_metadata.data, self.metadata) pdt.assert_series_equal(test_metadata.sample_id_to_phenotype, true_sample_id_to_phenotype) pdt.assert_array_equal(test_metadata.unique_phenotypes, true_unique_phenotypes) pdt.assert_array_equal(test_metadata.n_phenotypes, len(true_unique_phenotypes)) pdt.assert_array_equal(test_metadata._default_phenotype_order, list(sorted(true_unique_phenotypes))) pdt.assert_array_equal(test_metadata.phenotype_order, true_phenotype_order) pdt.assert_array_equal(test_metadata.phenotype_transitions, true_phenotype_transitions) pdt.assert_array_equal(test_metadata._colors, true_colors) pdt.assert_array_equal(test_metadata._default_phenotype_to_color, true_default_phenotype_to_color) pdt.assert_dict_equal(test_metadata.phenotype_to_color, true_phenotype_to_color) pdt.assert_dict_equal(test_metadata.phenotype_to_marker, true_phenotype_to_marker) pdt.assert_array_equal(test_metadata.phenotype_color_order, true_phenotype_color_order) pdt.assert_dict_equal(test_metadata.sample_id_to_color, true_sample_id_to_color) pdt.assert_dict_equal(test_metadata.sample_subsets, true_sample_subsets)