def test_isotope_images(dataset: SMDataset):
    sf, adduct = dataset.annotations(neutralLoss='', chemMod='')[0]

    images = dataset.isotope_images(sf, adduct)

    assert len(images) > 1
    assert isinstance(images[0], np.ndarray)
def test_results_with_coloc(dataset: SMDataset):
    coloc_with = dataset.results(database=('HMDB', 'v4'), fdr=0.5).ion[0]
    coloc_annotations = dataset.results(database=('HMDB', 'v4'),
                                        fdr=0.5,
                                        coloc_with=coloc_with)

    assert len(coloc_annotations) > 0
    assert coloc_annotations.colocCoeff.all()
def test_map_database_works_handles_strs_ids_from_api(mock_getAnnotations,
                                                      mock_get_databases,
                                                      dataset: SMDataset):
    # This test is just to ensure that the forward-compatibility with string IDs has the correct behavior
    dataset.results()

    print(mock_getAnnotations.call_args)
    annot_filter = mock_getAnnotations.call_args[1]['annotationFilter']
    assert annot_filter['databaseId'] == '22'
def test_diagnostics(dataset: SMDataset):
    diagnostics = dataset.diagnostics()
    tic_diag = dataset.diagnostic('TIC')
    imzml_diag = dataset.diagnostic('IMZML_METADATA')
    tic_image = dataset.tic_image()

    assert any(diag['type'] == 'TIC' for diag in diagnostics)
    assert isinstance(tic_diag['images'][0]['image'], np.ndarray)
    assert imzml_diag is not None
    assert isinstance(tic_image, np.ndarray)
def test_isotope_images_advanced(advanced_dataset: SMDataset):
    sf, cm, nl, adduct = advanced_dataset.annotations(
        return_vals=('sumFormula', 'chemMod', 'neutralLoss', 'adduct'),
        neutralLoss='-H2O',
        chemMod='-H+C',
    )[0]

    images = advanced_dataset.isotope_images(sf,
                                             adduct,
                                             chem_mod=cm,
                                             neutral_loss=nl)

    assert len(images) > 1
    assert isinstance(images[0], np.ndarray)
示例#6
0
def fetch_data_from_metaspace(pol, fdr):
    all_coloc = []
    all_ranges = []
    all_mz_dict = {}
    ion_present_in_ds = set()
    datasets = [
        SMDataset(info, sm._gqclient) for info in sm._gqclient.getDatasets(
            {'polarity': 'POSITIVE' if pol == POS else 'NEGATIVE'})
    ]
    ds_ids = []
    with ProcessPoolExecutor(8) as ex:
        for i, result in enumerate(
                ex.map(get_ds_data, [ds.id for ds in datasets], repeat(fdr))):
            if i % 100 == 0: print(pol, i, 'of', len(datasets))
            if result is not None:
                ds_ids.append(datasets[i].id)
                coloc, mz_range, mz_dict = result
                n = len(
                    all_coloc
                )  # Keep a consistent index of which non-empty DS we are at
                all_coloc.append(coloc)
                all_ranges.append(mz_range)
                all_mz_dict.update(mz_dict)
                ion_present_in_ds.update((n, mol) for mol in coloc.source)
                ion_present_in_ds.update((n, mol) for mol in coloc.target)

    gc.collect()
    all_coloc = pd.concat(all_coloc, ignore_index=True)

    return all_coloc, (ds_ids, all_ranges, all_mz_dict, ion_present_in_ds)
def test_all_annotation_images(dataset: SMDataset):
    image_list = dataset.all_annotation_images(only_first_isotope=True)

    assert isinstance(image_list[0], IsotopeImages)
    assert len(image_list) > 0
    assert all(len(isotope_images) == 1 for isotope_images in image_list)
    assert isinstance(image_list[0][0], np.ndarray)
示例#8
0
def fetch_data_from_metaspace(is_pos, coloc_filename, data_filename):
    all_coloc = []
    all_ranges = []
    all_mz_dict = {}
    ion_present_in_ds = set()
    datasets = [
        SMDataset(info, sm._gqclient) for info in sm._gqclient.getDatasets(
            {'polarity': 'POSITIVE' if is_pos else 'NEGATIVE'})
    ]
    ds_ids = []
    with ProcessPoolExecutor(8) as ex:
        for i, result in enumerate(
                ex.map(get_ds_data, [ds.id for ds in datasets])):
            if i % 100 == 0:
                print("pos" if is_pos else "neg", i, 'of', len(datasets))
            if result is not None:
                ds_ids.append(datasets[i].id)
                coloc, mz_range, mz_dict = result
                n = len(
                    all_coloc
                )  # Keep a consistent index of which non-empty DS we are at
                all_coloc.append(coloc)
                all_ranges.append(mz_range)
                all_mz_dict.update(mz_dict)
                ion_present_in_ds.update((n, mol) for mol in coloc.source)
                ion_present_in_ds.update((n, mol) for mol in coloc.target)

    all_coloc = pd.concat(all_coloc, ignore_index=True)
    print('saving', coloc_filename)
    all_coloc.to_pickle(coloc_filename)
    del all_coloc
    gc.collect()
    pickle.dump((ds_ids, all_ranges, all_mz_dict, ion_present_in_ds),
                open(data_filename, 'wb'))
def test_results_neutral_loss_chem_mod(advanced_dataset: SMDataset):
    """
    Test setup: Create a dataset with a -H2O neutral loss and a -H+C chem mod.
    """
    annotations = advanced_dataset.results(database=('HMDB', 'v4'), fdr=0.5)
    annotations_cm = advanced_dataset.results(database=('HMDB', 'v4'),
                                              fdr=0.5,
                                              include_chem_mods=True)
    annotations_nl = advanced_dataset.results(database=('HMDB', 'v4'),
                                              fdr=0.5,
                                              include_neutral_losses=True)
    annotations_cm_nl = advanced_dataset.results(database=('HMDB', 'v4'),
                                                 fdr=0.5,
                                                 include_chem_mods=True,
                                                 include_neutral_losses=True)

    # Check expected columns
    assert list(annotations_cm.index.names) == ['formula', 'adduct', 'chemMod']
    assert list(
        annotations_nl.index.names) == ['formula', 'adduct', 'neutralLoss']
    assert list(annotations_cm_nl.index.names) == [
        'formula', 'adduct', 'chemMod', 'neutralLoss'
    ]

    # Check CMs / NLs are present when explicitly included
    assert len(annotations_cm[
        annotations_cm.index.get_level_values('chemMod') != '']) > 0
    assert len(annotations_nl[
        annotations_nl.index.get_level_values('neutralLoss') != '']) > 0
    assert len(annotations_cm_nl[
        annotations_cm_nl.index.get_level_values('chemMod') != '']) > 0
    assert len(annotations_cm_nl[
        annotations_cm_nl.index.get_level_values('neutralLoss') != '']) > 0

    # Check CMs / NLs are excluded if they're not explicitly included
    assert annotations.index.is_unique
    assert annotations_cm.index.is_unique
    assert annotations_nl.index.is_unique
    assert annotations_cm_nl.index.is_unique
    assert len(annotations) < len(annotations_cm) < len(annotations_cm_nl)
    assert len(annotations) < len(annotations_nl) < len(annotations_cm_nl)
    plain_annotations = set(
        annotations_cm_nl.reset_index([
            'chemMod', 'neutralLoss'
        ])[lambda df: (df.chemMod == '') & (df.neutralLoss == '')].index)
    assert set(annotations.index) == plain_annotations
示例#10
0
def test_all_annotation_images_advanced(advanced_dataset: SMDataset):
    image_list = advanced_dataset.all_annotation_images(
        only_first_isotope=True)

    # Assert images were returned for annotations with and without CMs / NLs
    assert any(isotope_images.chem_mod for isotope_images in image_list)
    assert any(not isotope_images.chem_mod for isotope_images in image_list)
    assert any(isotope_images.neutral_loss for isotope_images in image_list)
    assert any(not isotope_images.neutral_loss
               for isotope_images in image_list)
示例#11
0
def test_all_annotation_images_tic(dataset: SMDataset):
    image_list = dataset.all_annotation_images(only_first_isotope=True,
                                               scale_intensity='TIC',
                                               fdr=0.5)

    all_images = np.stack(images[0] for images in image_list
                          if images[0] is not None)
    pixel_sums = np.sum(all_images, axis=0)
    pixel_sums = pixel_sums[~np.isnan(all_images[0])]
    # The sum of annotations generally shouldn't substantially exceed the TIC
    assert (pixel_sums < 1.5).all()
    assert (pixel_sums >= 0).all()  # There should be no negative values
    assert (pixel_sums > 0).any()  # There should be positive values
示例#12
0
def test_isotope_images_scaling(dataset: SMDataset):
    ann = dataset.results(neutralLoss='', chemMod='').iloc[0]
    formula, adduct = ann.name

    scaled_img = dataset.isotope_images(formula, adduct)[0]
    unscaled_img = dataset.isotope_images(formula,
                                          adduct,
                                          scale_intensity=False)[0]
    clipped_img = dataset.isotope_images(formula,
                                         adduct,
                                         hotspot_clipping=True)[0]
    clipped_unscaled_img = dataset.isotope_images(formula,
                                                  adduct,
                                                  scale_intensity=False,
                                                  hotspot_clipping=True)[0]

    assert np.max(scaled_img) == pytest.approx(ann.intensity)
    assert np.max(unscaled_img) == pytest.approx(1)
    assert np.max(clipped_img) < ann.intensity
    assert np.max(
        clipped_img
    ) > ann.intensity / 2  # Somewhat arbitrary, but generally holds true
    assert np.max(clipped_unscaled_img) == pytest.approx(1)
示例#13
0
def test_results_with_str_database_id(dataset: SMDataset):
    # The type of database IDs was up in the air for a while. Both ints and int-strings are accepted
    # and are converted to the correct form internally
    annotations = dataset.results('22', fdr=0.5)

    assert len(annotations) > 0
示例#14
0
def test_results_with_int_database_id(dataset: SMDataset):
    annotations = dataset.results(22, fdr=0.5)

    assert len(annotations) > 0
示例#15
0
def test_results(dataset: SMDataset):
    annotations = dataset.results(database=('HMDB', 'v4'), fdr=0.5)

    assert len(annotations) > 0
    assert all(col in annotations.columns for col in EXPECTED_RESULTS_COLS)
    assert list(annotations.index.names) == ['formula', 'adduct']
示例#16
0
def test_annotations(dataset: SMDataset):
    annotations = dataset.annotations()

    assert len(annotations) > 0
    assert len(annotations[0]) == 2  # sf, adduct tuple