def test_isotope_images(dataset: SMDataset): sf, adduct = dataset.annotations(neutralLoss='', chemMod='')[0] images = dataset.isotope_images(sf, adduct) assert len(images) > 1 assert isinstance(images[0], np.ndarray)
def test_results_with_coloc(dataset: SMDataset): coloc_with = dataset.results(database=('HMDB', 'v4'), fdr=0.5).ion[0] coloc_annotations = dataset.results(database=('HMDB', 'v4'), fdr=0.5, coloc_with=coloc_with) assert len(coloc_annotations) > 0 assert coloc_annotations.colocCoeff.all()
def test_map_database_works_handles_strs_ids_from_api(mock_getAnnotations, mock_get_databases, dataset: SMDataset): # This test is just to ensure that the forward-compatibility with string IDs has the correct behavior dataset.results() print(mock_getAnnotations.call_args) annot_filter = mock_getAnnotations.call_args[1]['annotationFilter'] assert annot_filter['databaseId'] == '22'
def test_diagnostics(dataset: SMDataset): diagnostics = dataset.diagnostics() tic_diag = dataset.diagnostic('TIC') imzml_diag = dataset.diagnostic('IMZML_METADATA') tic_image = dataset.tic_image() assert any(diag['type'] == 'TIC' for diag in diagnostics) assert isinstance(tic_diag['images'][0]['image'], np.ndarray) assert imzml_diag is not None assert isinstance(tic_image, np.ndarray)
def test_isotope_images_advanced(advanced_dataset: SMDataset): sf, cm, nl, adduct = advanced_dataset.annotations( return_vals=('sumFormula', 'chemMod', 'neutralLoss', 'adduct'), neutralLoss='-H2O', chemMod='-H+C', )[0] images = advanced_dataset.isotope_images(sf, adduct, chem_mod=cm, neutral_loss=nl) assert len(images) > 1 assert isinstance(images[0], np.ndarray)
def fetch_data_from_metaspace(pol, fdr): all_coloc = [] all_ranges = [] all_mz_dict = {} ion_present_in_ds = set() datasets = [ SMDataset(info, sm._gqclient) for info in sm._gqclient.getDatasets( {'polarity': 'POSITIVE' if pol == POS else 'NEGATIVE'}) ] ds_ids = [] with ProcessPoolExecutor(8) as ex: for i, result in enumerate( ex.map(get_ds_data, [ds.id for ds in datasets], repeat(fdr))): if i % 100 == 0: print(pol, i, 'of', len(datasets)) if result is not None: ds_ids.append(datasets[i].id) coloc, mz_range, mz_dict = result n = len( all_coloc ) # Keep a consistent index of which non-empty DS we are at all_coloc.append(coloc) all_ranges.append(mz_range) all_mz_dict.update(mz_dict) ion_present_in_ds.update((n, mol) for mol in coloc.source) ion_present_in_ds.update((n, mol) for mol in coloc.target) gc.collect() all_coloc = pd.concat(all_coloc, ignore_index=True) return all_coloc, (ds_ids, all_ranges, all_mz_dict, ion_present_in_ds)
def test_all_annotation_images(dataset: SMDataset): image_list = dataset.all_annotation_images(only_first_isotope=True) assert isinstance(image_list[0], IsotopeImages) assert len(image_list) > 0 assert all(len(isotope_images) == 1 for isotope_images in image_list) assert isinstance(image_list[0][0], np.ndarray)
def fetch_data_from_metaspace(is_pos, coloc_filename, data_filename): all_coloc = [] all_ranges = [] all_mz_dict = {} ion_present_in_ds = set() datasets = [ SMDataset(info, sm._gqclient) for info in sm._gqclient.getDatasets( {'polarity': 'POSITIVE' if is_pos else 'NEGATIVE'}) ] ds_ids = [] with ProcessPoolExecutor(8) as ex: for i, result in enumerate( ex.map(get_ds_data, [ds.id for ds in datasets])): if i % 100 == 0: print("pos" if is_pos else "neg", i, 'of', len(datasets)) if result is not None: ds_ids.append(datasets[i].id) coloc, mz_range, mz_dict = result n = len( all_coloc ) # Keep a consistent index of which non-empty DS we are at all_coloc.append(coloc) all_ranges.append(mz_range) all_mz_dict.update(mz_dict) ion_present_in_ds.update((n, mol) for mol in coloc.source) ion_present_in_ds.update((n, mol) for mol in coloc.target) all_coloc = pd.concat(all_coloc, ignore_index=True) print('saving', coloc_filename) all_coloc.to_pickle(coloc_filename) del all_coloc gc.collect() pickle.dump((ds_ids, all_ranges, all_mz_dict, ion_present_in_ds), open(data_filename, 'wb'))
def test_results_neutral_loss_chem_mod(advanced_dataset: SMDataset): """ Test setup: Create a dataset with a -H2O neutral loss and a -H+C chem mod. """ annotations = advanced_dataset.results(database=('HMDB', 'v4'), fdr=0.5) annotations_cm = advanced_dataset.results(database=('HMDB', 'v4'), fdr=0.5, include_chem_mods=True) annotations_nl = advanced_dataset.results(database=('HMDB', 'v4'), fdr=0.5, include_neutral_losses=True) annotations_cm_nl = advanced_dataset.results(database=('HMDB', 'v4'), fdr=0.5, include_chem_mods=True, include_neutral_losses=True) # Check expected columns assert list(annotations_cm.index.names) == ['formula', 'adduct', 'chemMod'] assert list( annotations_nl.index.names) == ['formula', 'adduct', 'neutralLoss'] assert list(annotations_cm_nl.index.names) == [ 'formula', 'adduct', 'chemMod', 'neutralLoss' ] # Check CMs / NLs are present when explicitly included assert len(annotations_cm[ annotations_cm.index.get_level_values('chemMod') != '']) > 0 assert len(annotations_nl[ annotations_nl.index.get_level_values('neutralLoss') != '']) > 0 assert len(annotations_cm_nl[ annotations_cm_nl.index.get_level_values('chemMod') != '']) > 0 assert len(annotations_cm_nl[ annotations_cm_nl.index.get_level_values('neutralLoss') != '']) > 0 # Check CMs / NLs are excluded if they're not explicitly included assert annotations.index.is_unique assert annotations_cm.index.is_unique assert annotations_nl.index.is_unique assert annotations_cm_nl.index.is_unique assert len(annotations) < len(annotations_cm) < len(annotations_cm_nl) assert len(annotations) < len(annotations_nl) < len(annotations_cm_nl) plain_annotations = set( annotations_cm_nl.reset_index([ 'chemMod', 'neutralLoss' ])[lambda df: (df.chemMod == '') & (df.neutralLoss == '')].index) assert set(annotations.index) == plain_annotations
def test_all_annotation_images_advanced(advanced_dataset: SMDataset): image_list = advanced_dataset.all_annotation_images( only_first_isotope=True) # Assert images were returned for annotations with and without CMs / NLs assert any(isotope_images.chem_mod for isotope_images in image_list) assert any(not isotope_images.chem_mod for isotope_images in image_list) assert any(isotope_images.neutral_loss for isotope_images in image_list) assert any(not isotope_images.neutral_loss for isotope_images in image_list)
def test_all_annotation_images_tic(dataset: SMDataset): image_list = dataset.all_annotation_images(only_first_isotope=True, scale_intensity='TIC', fdr=0.5) all_images = np.stack(images[0] for images in image_list if images[0] is not None) pixel_sums = np.sum(all_images, axis=0) pixel_sums = pixel_sums[~np.isnan(all_images[0])] # The sum of annotations generally shouldn't substantially exceed the TIC assert (pixel_sums < 1.5).all() assert (pixel_sums >= 0).all() # There should be no negative values assert (pixel_sums > 0).any() # There should be positive values
def test_isotope_images_scaling(dataset: SMDataset): ann = dataset.results(neutralLoss='', chemMod='').iloc[0] formula, adduct = ann.name scaled_img = dataset.isotope_images(formula, adduct)[0] unscaled_img = dataset.isotope_images(formula, adduct, scale_intensity=False)[0] clipped_img = dataset.isotope_images(formula, adduct, hotspot_clipping=True)[0] clipped_unscaled_img = dataset.isotope_images(formula, adduct, scale_intensity=False, hotspot_clipping=True)[0] assert np.max(scaled_img) == pytest.approx(ann.intensity) assert np.max(unscaled_img) == pytest.approx(1) assert np.max(clipped_img) < ann.intensity assert np.max( clipped_img ) > ann.intensity / 2 # Somewhat arbitrary, but generally holds true assert np.max(clipped_unscaled_img) == pytest.approx(1)
def test_results_with_str_database_id(dataset: SMDataset): # The type of database IDs was up in the air for a while. Both ints and int-strings are accepted # and are converted to the correct form internally annotations = dataset.results('22', fdr=0.5) assert len(annotations) > 0
def test_results_with_int_database_id(dataset: SMDataset): annotations = dataset.results(22, fdr=0.5) assert len(annotations) > 0
def test_results(dataset: SMDataset): annotations = dataset.results(database=('HMDB', 'v4'), fdr=0.5) assert len(annotations) > 0 assert all(col in annotations.columns for col in EXPECTED_RESULTS_COLS) assert list(annotations.index.names) == ['formula', 'adduct']
def test_annotations(dataset: SMDataset): annotations = dataset.annotations() assert len(annotations) > 0 assert len(annotations[0]) == 2 # sf, adduct tuple