def test_save(self, study, tmpdir): from flotilla.datapackage import name_to_resource study_name = 'test_save' study.supplemental.expression_corr = study.expression.data.corr() study.save(study_name, flotilla_dir=tmpdir) assert len(tmpdir.listdir()) == 1 save_dir = tmpdir.listdir()[0] with open('{}/datapackage.json'.format(save_dir)) as f: test_datapackage = json.load(f) assert study_name == save_dir.purebasename # resource_keys_to_ignore = ('compression', 'format', 'path', # 'url') keys_from_study = { 'splicing': [], 'expression': ['thresh', 'log_base', 'plus_one'], 'metadata': [ 'phenotype_order', 'phenotype_to_color', 'phenotype_col', 'phenotype_to_marker', 'pooled_col', 'minimum_samples' ], 'mapping_stats': ['number_mapped_col', 'min_reads'], 'expression_feature': ['rename_col', 'ignore_subset_cols'], 'splicing_feature': ['rename_col', 'ignore_subset_cols', 'expression_id_col'], 'gene_ontology': [] } resource_names = keys_from_study.keys() # Add auto-generated attributes into the true datapackage for name, keys in keys_from_study.iteritems(): resource = name_to_resource(test_datapackage, name) for key in keys: command = self.get_data_eval_command(name, key) test_value = resource[key] true_value = eval(command) if isinstance(test_value, dict): pdt.assert_dict_equal(test_value, true_value) elif isinstance(test_value, Iterable): pdt.assert_array_equal(test_value, true_value) for name in resource_names: resource = name_to_resource(test_datapackage, name) path = '{}.csv.gz'.format(name) assert resource['path'] == path test_df = pd.read_csv('{}/{}/{}'.format(tmpdir, study_name, path), index_col=0, compression='gzip') command = self.get_data_eval_command(name, 'data_original') true_df = eval(command) pdt.assert_frame_equal(test_df, true_df) version = semantic_version.Version(study.version) version.patch += 1 assert str(version) == test_datapackage['datapackage_version'] assert study_name == test_datapackage['name']
def test_save_supplemental(self, study, tmpdir): from flotilla.datapackage import name_to_resource study_name = 'test_save_supplemental' study.supplemental.expression_corr = study.expression.data.corr() study.save(study_name, flotilla_dir=tmpdir) assert len(tmpdir.listdir()) == 1 save_dir = tmpdir.listdir()[0] with open('{}/datapackage.json'.format(save_dir)) as f: test_datapackage = json.load(f) supplemental = name_to_resource(test_datapackage, 'supplemental') for resource in supplemental['resources']: name = resource['name'] path = '{}.csv.gz'.format(name) assert resource['path'] == path full_path = '{}/{}/{}'.format(tmpdir, study_name, path) test_df = pd.read_csv(full_path, index_col=0, compression='gzip') command = self.get_data_eval_command('supplemental', name) true_df = eval(command) pdt.assert_frame_equal(test_df, true_df) version = semantic_version.Version(study.version) version.patch += 1 assert str(version) == test_datapackage['datapackage_version'] assert study_name == test_datapackage['name']
def datapackage(self, shalek2013_datapackage, metadata_none_key, expression_none_key, splicing_none_key, monkeypatch): datapackage = copy.deepcopy(shalek2013_datapackage) datatype_to_key = {'metadata': metadata_none_key, 'expression': expression_none_key, 'splicing': splicing_none_key} for datatype, key in datatype_to_key.iteritems(): if key is not None: resource = name_to_resource(datapackage, datatype) if key in resource: monkeypatch.delitem(resource, key, raising=False) return datapackage
def test_save(self, study, tmpdir): from flotilla.datapackage import name_to_resource study_name = 'test_save' study.supplemental.expression_corr = study.expression.data.corr() study.save(study_name, flotilla_dir=tmpdir) assert len(tmpdir.listdir()) == 1 save_dir = tmpdir.listdir()[0] with open('{}/datapackage.json'.format(save_dir)) as f: test_datapackage = json.load(f) assert study_name == save_dir.purebasename # resource_keys_to_ignore = ('compression', 'format', 'path', # 'url') keys_from_study = {'splicing': [], 'expression': ['thresh', 'log_base', 'plus_one'], 'metadata': ['phenotype_order', 'phenotype_to_color', 'phenotype_col', 'phenotype_to_marker', 'pooled_col', 'minimum_samples'], 'mapping_stats': ['number_mapped_col', 'min_reads'], 'expression_feature': ['rename_col', 'ignore_subset_cols'], 'splicing_feature': ['rename_col', 'ignore_subset_cols', 'expression_id_col'], 'gene_ontology': []} resource_names = keys_from_study.keys() # Add auto-generated attributes into the true datapackage for name, keys in keys_from_study.iteritems(): resource = name_to_resource(test_datapackage, name) for key in keys: command = self.get_data_eval_command(name, key) test_value = resource[key] true_value = eval(command) if isinstance(test_value, dict): pdt.assert_dict_equal(test_value, true_value) elif isinstance(test_value, Iterable): pdt.assert_array_equal(test_value, true_value) for name in resource_names: resource = name_to_resource(test_datapackage, name) path = '{}.csv.gz'.format(name) assert resource['path'] == path test_df = pd.read_csv('{}/{}/{}'.format(tmpdir, study_name, path), index_col=0, compression='gzip') command = self.get_data_eval_command(name, 'data_original') true_df = eval(command) pdt.assert_frame_equal(test_df, true_df) version = semantic_version.Version(study.version) version.patch += 1 assert str(version) == test_datapackage['datapackage_version'] assert study_name == test_datapackage['name']
def test_save(self, shalek2013_datapackage_path, shalek2013_datapackage, tmpdir, monkeypatch): import flotilla from flotilla.datapackage import name_to_resource study = flotilla.embark(shalek2013_datapackage_path, load_species_data=False) study_name = 'test_save' study.save(study_name, flotilla_dir=tmpdir) assert len(tmpdir.listdir()) == 1 save_dir = tmpdir.listdir()[0] with open('{}/datapackage.json'.format(save_dir)) as f: test_datapackage = json.load(f) true_datapackage = copy.deepcopy(shalek2013_datapackage) assert study_name == save_dir.purebasename resource_keys_to_ignore = ('compression', 'format', 'path', 'url') keys_from_study = {'splicing': [], 'expression': ['thresh', 'log_base', 'plus_one'], 'metadata': ['phenotype_order', 'phenotype_to_color', 'phenotype_col', 'phenotype_to_marker', 'pooled_col', 'minimum_samples'], 'mapping_stats': ['number_mapped_col'], 'expression_feature': ['rename_col', 'ignore_subset_cols'], 'splicing_feature': ['rename_col', 'ignore_subset_cols', 'expression_id_col']} resource_names = keys_from_study.keys() # Add auto-generated attributes into the true datapackage for name, keys in keys_from_study.iteritems(): resource = name_to_resource(true_datapackage, name) for key in keys: if 'feature' in name: command = 'study.{}.feature_{}'.format(name.rstrip( '_feature'), key) else: command = 'study.{}.{}'.format(name, key) monkeypatch.setitem(resource, key, eval(command)) for name in resource_names: resource = name_to_resource(test_datapackage, name) assert resource['path'] == '{}.csv.gz'.format(name) version = semantic_version.Version(study.version) version.patch += 1 assert str(version) == test_datapackage['datapackage_version'] assert study_name == test_datapackage['name'] datapackage_keys_to_ignore = ['name', 'datapackage_version', 'resources'] datapackages = (true_datapackage, test_datapackage) for name in resource_names: for datapackage in datapackages: resource = name_to_resource(datapackage, name) for key in resource_keys_to_ignore: monkeypatch.delitem(resource, key, raising=False) # Have to check for resources separately because they could be in any # order, it just matters that the contents are equal sorted_true = sorted(true_datapackage['resources'], key=lambda x: x['name']) sorted_test = sorted(test_datapackage['resources'], key=lambda x: x['name']) for i in range(len(sorted_true)): pdt.assert_equal(sorted(sorted_true[i].items()), sorted(sorted_test[i].items())) for key in datapackage_keys_to_ignore: for datapackage in datapackages: monkeypatch.delitem(datapackage, key) pdt.assert_dict_equal(test_datapackage, true_datapackage)