Exemplo n.º 1
0
 def test_is_not_empty_if_annotated(self):
     m1 = Molecule(name='test', sum_formula="C1H2O3")
     m1.save()
     s1 = Standard(molecule=m1)
     s1.save()
     d1 = Dataset()
     d1.save()
     fs1 = FragmentationSpectrum(ms1_intensity=42, dataset=d1, standard=s1)
     fs1.save()
     molecule_table, molecules_with_spectra = self.get_table_and_count()
     self.assertEqual(len(molecule_table.rows), 1)
     self.assertEqual(molecules_with_spectra, 1)
Exemplo n.º 2
0
 def test_make_FragmentationSpectrum_with_centroids(self):
     d1 = Dataset(name='Dataset1')
     d1.save()
     f1 = FragmentationSpectrum(precursor_mz='123.456',
                                spec_num=0, dataset=d1)
     mzs = [10., 20, 50]
     ints = [1., 1., 1.]
     f1.set_centroid_mzs(mzs)
     f1.set_centroid_ints(ints)
     f1.save()
     np.testing.assert_array_almost_equal(mzs, f1.centroid_mzs)
     np.testing.assert_array_almost_equal(ints, f1.centroid_ints)
Exemplo n.º 3
0
def handle_uploaded_files(metadata, mzml_filepath, d):
    logger = logging.getLogger(__file__ + str(d.id))
    logger.addHandler(DatabaseLogHandler(d, level=logging.ERROR))

    msrun = pymzml.run.Reader(mzml_filepath)
    ppm = float(metadata['mass_accuracy_ppm'])
    mz_tol_quad = float(metadata['quad_window_mz'])
    scan_time = []
    standards = Standard.objects.all().filter(pk__in=metadata['standards'])
    adducts = Adduct.objects.all().filter(pk__in=metadata['adducts'])
    mz_upper = {}
    mz_lower = {}
    mz = {}
    logger.debug(standards.count())
    for standard in standards:
        mz_upper[standard] = {}
        mz_lower[standard] = {}
        mz[standard] = {}
        for adduct in adducts:
            mz[standard][adduct] = standard.molecule.get_mz(adduct)
            logger.debug(standard)
            logger.debug(mz[standard][adduct])
            delta_mz = mz[standard][adduct] * ppm * 1e-6
            mz_upper[standard][adduct] = mz[standard][adduct] + delta_mz
            mz_lower[standard][adduct] = mz[standard][adduct] - delta_mz
    logger.debug('adding dataset')
    try:
        lc_info = metadata['lc_info']
        ms_info = metadata['ms_info']
    except LookupError:
        logger.debug('no instrument information supplied; using empty string instead')
        lc_info = ms_info = ''

    lc_info_stripped = set()
    ms_info_stripped = set()
    for lc in lc_info.split(', '):
        if lc:
            lc_stripped = lc.replace(',', '').strip()
            lc_info_stripped.add(lc_stripped)
    for ms in ms_info.split(', '):
        if ms:
            ms_stripped = ms.replace(',', '').strip()
            ms_info_stripped.add(ms_stripped)
    for lc in lc_info_stripped:
        lc_obj = LcInfo.objects.get_or_create(content=lc)[0]
        if lc_obj not in d.lc_info.all():
            d.lc_info.add(lc_obj)
    for ms in ms_info_stripped:
        ms_obj = MsInfo.objects.get_or_create(content=ms)[0]
        if ms_obj not in d.ms_info.all():
            d.ms_info.add(ms_obj)

    d.mass_accuracy_ppm = ppm
    d.save()
    for standard in standards:
        d.standards_present.add(standard)
    for adduct in adducts:
        d.adducts_present.add(adduct)
    d.save()
    logger.debug('adding msms')
    xics = {}
    spec_n = 0
    for spectrum in msrun:
        spec_n += 1
        if spectrum['ms level'] == 1:
            scan_time.append(spectrum['scan start time'])
        # Iterate adducts/standards and get values as required
        for standard in standards:
            if standard not in xics:
                xics[standard] = {}
            for adduct in adducts:
                if adduct not in xics[standard]:
                    xics[standard][adduct] = []
                if spectrum['ms level'] == 1:
                    x = 0
                    for m, i in spectrum.centroidedPeaks:
                        if all([m >= mz_lower[standard][adduct], m <= mz_upper[standard][adduct]]):
                            x += i
                    xics[standard][adduct].append(x)
                if spectrum['ms level'] == 2:
                    add_msms = False
                    pre_mz = float(spectrum['precursors'][0]['mz'])
                    mz_tol_this_adduct = mz[standard][adduct] * ppm * 1e-6
                    if any((abs(pre_mz - mz[standard][adduct]) <= mz_tol_this_adduct,
                            abs(pre_mz - mz[standard][adduct]) <= mz_tol_quad)):  # frag spectrum probably the target
                        add_msms = True
                    if add_msms:
                        ms1_intensity = xics[standard][adduct][-1]
                        mzs = spectrum.mz
                        ints = spectrum.i
                        quad_ints = [ii for m, ii in zip(mzs, ints) if
                                     all((m >= pre_mz - mz_tol_quad, m <= pre_mz + mz_tol_quad))]
                        ppm_ints = [ii for m, ii in zip(mzs, ints) if
                                    all((m >= pre_mz - mz_tol_this_adduct, m <= pre_mz + mz_tol_this_adduct))]
                        quad_ints_sum = sum(quad_ints)
                        ppm_ints_sum = sum(ppm_ints)
                        ce_type = ''
                        ce_energy = ''
                        ce_gas = ''
                        for element in spectrum.xmlTree:
                            if element.get('accession') == "MS:1000133":
                                ce_type = element.items()
                            elif element.get('accession') == "MS:1000045":
                                ce_energy = dict(element.items())
                        ce_str = "{} {} {}".format(ce_energy['name'], ce_energy['value'], ce_energy['unitName'])
                        if ppm_ints_sum == 0:
                            pre_fraction = 0
                        else:
                            pre_fraction = sum(ppm_ints) / sum(quad_ints)
                        f = FragmentationSpectrum(precursor_mz=pre_mz,
                                                  rt=spectrum['scan start time'], dataset=d, spec_num=spec_n,
                                                  precursor_quad_fraction=pre_fraction, ms1_intensity=ms1_intensity,
                                                  collision_energy=ce_str)
                        f.set_centroid_mzs(spectrum.mz)
                        f.set_centroid_ints(spectrum.i)
                        f.collision = ce_str
                        f.save()
    logger.debug("adding xics")
    for standard in standards:
        for adduct in adducts:
            # if np.sum(xics[standard][adduct]) > 0:
            x = Xic(mz=standard.molecule.get_mz(adduct), dataset=d)
            x.set_xic(xics[standard][adduct])
            x.set_rt(scan_time)
            x.standard = standard
            x.adduct = adduct
            x.save()
    d.processing_finished = True
    d.save()
    logger.debug('done')
    logger.debug("added = True")
    return True