Exemplo n.º 1
0
def main():

    if len(sys.argv) < 3:
        raise RuntimeError("need arguments: tdf_directory output.mzML")

    analysis_dir = sys.argv[1]
    output_fname = sys.argv[2]

    if sys.version_info.major == 2:
        analysis_dir = unicode(analysis_dir)

    td = timsdata.TimsData(analysis_dir)
    conn = td.conn

    # Get total frame count:
    q = conn.execute("SELECT COUNT(*) FROM Frames")
    row = q.fetchone()
    N = row[0]
    print("Analysis has {0} frames.".format(N))

    # Store output
    if output_fname.lower().endswith("mzml"):
        consumer = pyopenms.PlainMSDataWritingConsumer(output_fname)

        # Compress output
        try:
            opt = consumer.getOptions()
            cfg = pyopenms.NumpressConfig()
            cfg.estimate_fixed_point = True
            cfg.numpressErrorTolerance = -1.0 # skip check, faster
            cfg.setCompression("linear");
            cfg.linear_fp_mass_acc = -1; # set the desired RT accuracy in seconds
            opt.setNumpressConfigurationMassTime(cfg)
            cfg = pyopenms.NumpressConfig()
            cfg = pyopenms.NumpressConfig()
            cfg.estimate_fixed_point = True
            cfg.numpressErrorTolerance = -1.0 # skip check, faster
            cfg.setCompression("slof");
            opt.setNumpressConfigurationIntensity(cfg)
            opt.setCompression(True) # zlib compression
            consumer.setOptions(opt)
        except Exception:
            pass

    if output_fname.lower().endswith("sqmass"):
        consumer = pyopenms.MSDataSqlConsumer(output_fname)

    for frame_id in range(N):
        store_frame(frame_id+1, td, conn, consumer, compressFrame=True)
Exemplo n.º 2
0
def get_consumer(output_fname):
    # Store output
    if output_fname.lower().endswith("mzml"):
        consumer = pyopenms.PlainMSDataWritingConsumer(output_fname)

        # Compress output
        try:
            opt = consumer.getOptions()
            diapysef.util.setCompressionOptions(opt)
            consumer.setOptions(opt)
        except Exception as e:
            print(e)
            print(
                "Your version of pyOpenMS does not support any compression, your files may get rather large"
            )
            pass

    elif output_fname.lower().endswith("sqmass"):
        consumer = pyopenms.MSDataSqlConsumer(output_fname)

    else:
        raise Exception("Supported filenames: mzML and sqMass.")

    return consumer
Exemplo n.º 3
0
class FilteringConsumer:
    """
    Consumer that forwards all calls the internal consumer (after filtering)
    """
    def __init__(self, consumer):
        self._internal_consumer = consumer

    def setExperimentalSettings(self, s):
        self._internal_consumer.setExperimentalSettings(s)

    def setExpectedSize(self, a, b):
        self._internal_consumer.setExpectedSize(a, b)

    def consumeChromatogram(self, c):
        if c.getNativeID().find(filter_string) != -1:
            self._internal_consumer.consumeChromatogram(c)

    def consumeSpectrum(self, s):
        if s.getNativeID().find(filter_string) != -1:
            self._internal_consumer.consumeSpectrum(s)


###################################
# Do the actual work
###################################

consumer = pyopenms.PlainMSDataWritingConsumer(outfile)
consumer = FilteringConsumer(consumer)

pyopenms.MzMLFile().transform(infile, consumer)
Exemplo n.º 4
0
def writeCombinedMzML(peptides, outputFile, templateFile, mzDelta,
                      minIntensity):
    import pyopenms, copy
    print 'Writing combined mzML file'

    # generator for template file spectra
    print 'Opening template file'
    spectra = MZMLtoSpectrum(templateFile)

    # output_file = pyopenms.MzMLFile()
    # output_experiment = pyopenms.MSExperiment()

    # can't use usual writer - stores all data in memory before
    # writing to mzML at once ---> too memory intensive

    # use consumer to write spectra on the fly
    consumer = pyopenms.PlainMSDataWritingConsumer(outputFile)

    def replaceData(targetmz, mzs, ints, MZmin, MZmax, synthMZs, synthINTs,
                    minIntensity, peptide):

        # # 1 remove synth data below threshold
        # maparray = np.where(synthINTs > minIntensity)
        # synthMZs = synthMZs[maparray]
        # synthINTs = synthINTs[maparray]

        # check some data is still present
        if synthMZs.shape[0] < 1:
            return mzs, ints

        noise = np.empty(0)
        maxSignal = 0

        maxSN = 0

        # section data into isotopes
        for i in range(20):

            i = float(i) / 2
            isotopeMin = MZmin + i - 0.05
            isotopeMax = MZmin + i + 0.3

            if isotopeMin > MZmax: break

            # get synthetic data chuncks
            indexArray = np.where((synthMZs >= isotopeMin)
                                  & (synthMZs < isotopeMax))

            isotopemzs = synthMZs[indexArray]
            isotopeints = synthINTs[indexArray]

            if isotopemzs.shape[0] < 1:
                #print 'no isotope peaks for peptide %s: mz: %s, rt: %s i-value: %s' %(peptide.index, peptide.newmz, peptide.newrt, i)
                #print 'isotopeMin: %s, isotopeMax: %s' %(isotopeMin, isotopeMax)
                peptide.write = False
                continue

            # get experimental data within isotope boundaries
            minIsotope = np.min(isotopemzs)
            maxIsotope = np.max(isotopemzs)

            indexArray = np.where((mzs > minIsotope) & (mzs < maxIsotope))

            isotopeExptlMzs = mzs[indexArray]
            isotopeExptlInts = ints[indexArray]

            # match exptl data to nearest synth mz
            # and increment corresponding xynth int
            for i in range(isotopeExptlMzs.shape[0]):
                eMz = isotopeExptlMzs[i]
                index = np.argmin(np.absolute(synthMZs - eMz))
                synthINTs[index] += isotopeExptlInts[i]

            overlap = np.where((mzs > minIsotope) & (mzs < maxIsotope))

            # get overlapping ints
            noise = ints[overlap]

            signal = np.max(isotopeints)

            if signal > maxSignal:
                maxSignal = signal

            mzs = np.delete(mzs, overlap)
            ints = np.delete(ints, overlap)

            mzs = np.concatenate((mzs, synthMZs))
            ints = np.concatenate((ints, synthINTs))

        sortmap = np.argsort(mzs)
        mzs = mzs[sortmap]
        ints = ints[sortmap]

        return mzs, ints, maxSignal

    print 'writing spectra'
    for i, s in enumerate(spectra):

        if i % 100 == 0:
            print 'Processing spectrum: %s, RT: %.2f' % (i, s.rt / 60)

        mzs = s.mzs
        ints = s.ints

        for p in peptides:
            if s.rt > p.RTmax or s.rt < p.RTmin: continue

            for rti, rt in enumerate(p.synthRTs):
                if rt == s.rt:  # be careful here

                    synthMZs = p.synthMZs
                    synthINTs1 = p.synthINTs[rti]
                    synthINTs2 = copy.deepcopy(synthINTs1)

                    # add light peak
                    mzs, ints, signal = replaceData(p.newmz, mzs, ints,
                                                    p.MZmin, p.MZmax, synthMZs,
                                                    synthINTs1, minIntensity,
                                                    p)

                    if signal > p.lightsignal:
                        p.lightsignal = signal

                    # add heavy peak
                    mzs, ints, signal = replaceData(p.newmz + mzDelta, mzs,
                                                    ints, p.MZmin + mzDelta,
                                                    p.MZmax + mzDelta,
                                                    synthMZs + mzDelta,
                                                    synthINTs2, minIntensity,
                                                    p)

                    if signal > p.heavysignal:
                        p.heavysignal = signal

        new_spectrum = copy.deepcopy(s.original)
        new_spectrum.set_peaks((mzs, ints))

        # use consumer to write new spectrum
        consumer.consumeSpectrum(new_spectrum)

    #output_file.store(outputFile, output_experiment)
    print 'done'
    return peptides