Exemplo n.º 1
0
def main():

    if len(sys.argv) < 3:
        raise RuntimeError("need arguments: tdf_directory output.mzML")

    analysis_dir = sys.argv[1]
    output_fname = sys.argv[2]

    if sys.version_info.major == 2:
        analysis_dir = unicode(analysis_dir)

    td = timsdata.TimsData(analysis_dir)
    conn = td.conn

    # Get total frame count:
    q = conn.execute("SELECT COUNT(*) FROM Frames")
    row = q.fetchone()
    N = row[0]
    print("Analysis has {0} frames.".format(N))

    # Store output
    if output_fname.lower().endswith("mzml"):
        consumer = pyopenms.PlainMSDataWritingConsumer(output_fname)

        # Compress output
        try:
            opt = consumer.getOptions()
            cfg = pyopenms.NumpressConfig()
            cfg.estimate_fixed_point = True
            cfg.numpressErrorTolerance = -1.0 # skip check, faster
            cfg.setCompression("linear");
            cfg.linear_fp_mass_acc = -1; # set the desired RT accuracy in seconds
            opt.setNumpressConfigurationMassTime(cfg)
            cfg = pyopenms.NumpressConfig()
            cfg = pyopenms.NumpressConfig()
            cfg.estimate_fixed_point = True
            cfg.numpressErrorTolerance = -1.0 # skip check, faster
            cfg.setCompression("slof");
            opt.setNumpressConfigurationIntensity(cfg)
            opt.setCompression(True) # zlib compression
            consumer.setOptions(opt)
        except Exception:
            pass

    if output_fname.lower().endswith("sqmass"):
        consumer = pyopenms.MSDataSqlConsumer(output_fname)

    for frame_id in range(N):
        store_frame(frame_id+1, td, conn, consumer, compressFrame=True)
Exemplo n.º 2
0
        e.addSpectrum(s)

        if verbose: print "scan ", k
        if len(mz) > 0 and verbose:

            print "  len: ", len(mz), len(intens)
            print "  at ook0", ook0_axis[k]
            print "  len: ", mz, intens
            for p in s:
                print p.getMZ(), p.getIntensity()

    # Store file at designated position
    pyopenms.MzMLFile().store(filename, e)


td = timsdata.TimsData(analysis_dir)
conn = td.conn

# Get total frame count:
q = conn.execute("SELECT COUNT(*) FROM Frames")
row = q.fetchone()
N = row[0]
print("Analysis has {0} frames.".format(N))

# For testing
## store_frame(10, "/tmp/test2.mzML")

for frame_id in range(N):
    store_frame(frame_id+1, "/tmp/test_%s.mzML" % (frame_id+1), q)

Exemplo n.º 3
0
def write_mzml(args):
    """
    Write mzml file

    Read command line arguments and use psims package to format and write mzml

    Parameters
    ----------
    args : args manespace object from argspars

    Returns
    -------
    None
    """
    mzml_data_struct = process_arg(args)

    ### Connect to TDF DB
    logging.info("transforming TDF to mzML file: {}".format(
        mzml_data_struct['input']))

    mzml_data_struct['td'] = timsdata.TimsData(mzml_data_struct['input'])

    ### If DIA method there is no precursor table
    if mzml_data_struct['td'].conn.execute(
            "SELECT name FROM sqlite_master WHERE type='table' AND name='Precursors'"
    ).fetchone():
        precursor_table_exists = True
    else:
        precursor_table_exists = False

    mzml_data_struct['data_dict'] = get_spectrum_dict(mzml_data_struct,
                                                      precursor_table_exists)

    logging.info("{} Total Frames.".format(
        mzml_data_struct['data_dict']['frame_count']))
    logging.info("{} Total Spectra.".format(
        mzml_data_struct['data_dict']['total_spectra']))
    logging.info("{} MS1 Frames.".format(
        mzml_data_struct['data_dict']['ms1_spectra_count']))
    logging.info("{} MS2 Merged Scans.".format(
        mzml_data_struct['data_dict']['ms2_spectra_count']))

    logging.info("writting to mzML file: {}".format(
        mzml_data_struct['output']))
    mzml_data_struct['writer'] = MzMLWriter(mzml_data_struct['output'])
    mzml_data_struct['writer'].begin()
    write_header(mzml_data_struct)

    # Get Spectra number in specified range
    total_spectra_count = get_num_spectra(mzml_data_struct,
                                          precursor_table_exists)
    logging.info("Processing {} Spectra.".format(total_spectra_count))
    logging.info("Reading, Merging and Formating Frames for mzML")

    with mzml_data_struct['writer'].run(
            id=1,
            instrument_configuration='IC1',
            start_time=mzml_data_struct['data_dict']['acq_date_time']):
        with mzml_data_struct['writer'].spectrum_list(
                count=total_spectra_count):
            # Process Frames
            mzml_data_struct['scan_loop_time1'] = time.time()
            mzml_data_struct['scan_index'] = 1

            mzml_data_struct['precursor_frames'] = mzml_data_struct[
                'td'].conn.execute(
                    "SELECT * From Frames where MsMsType=0").fetchall()
            # Check upper frame range
            if mzml_data_struct['end_frame'] == -1 or mzml_data_struct[
                    'end_frame'] > mzml_data_struct['data_dict']['frame_count']:
                mzml_data_struct['end_frame'] = mzml_data_struct['data_dict'][
                    'frame_count']

            for precursor_frame in mzml_data_struct['precursor_frames']:
                # Get Precursor Frame ID
                mzml_data_struct['current_precursor'] = {}
                mzml_data_struct['current_precursor']['id'] = precursor_frame[
                    0]
                mzml_data_struct['current_precursor'][
                    'start_time'] = precursor_frame[1] / 60

                if mzml_data_struct['current_precursor'][
                        'id'] < mzml_data_struct[
                            'start_frame'] or mzml_data_struct[
                                'current_precursor']['id'] > mzml_data_struct[
                                    'end_frame']:
                    continue

                write_precursor_frame(mzml_data_struct)

                logging.debug(mzml_data_struct['scan_index'])
                scan_progress(mzml_data_struct)

                if precursor_table_exists:
                    for precursor_data in get_precursor_list(mzml_data_struct):
                        mzml_data_struct['current_precursor'][
                            'data'] = precursor_data
                        write_pasef_msms_spectrum(mzml_data_struct)

                        scan_progress(mzml_data_struct)

    logging.info("Writing final mzML")
    mzml_data_struct['writer'].end()

    return
Exemplo n.º 4
0
def run_timstof_conversion(input, output=''):
    global place_high
    global precursor_counter
    analysis_dir = input

    td = timsdata.TimsData(analysis_dir)
    conn = td.conn

    # create a database connection
    #conn = create_connection(analysis_dir)
    precursor_map = {}

    with conn:
        # print("2. Query all tasks")
        msms_data = select_all_PasefFrameMsMsInfo(conn)
        # print msms_data[0:5]
        all_frame = select_all_Frames(conn)
        # print all_frame[0:5]
        precursor_list = select_all_Precursors(conn)
        for row in precursor_list:
            parent_id = int(row[-1])
            if parent_id not in precursor_map:
                precursor_map[parent_id] = []
            precursor_map[parent_id].append(row)

    all_ms1_frames = [a for a in all_frame if a[4] == '0']

    frame_id_ms1_scan_map, ms2_scan_map = build_frame_id_ms1_scan_map(
        precursor_map, all_ms1_frames)
    #offset_map = build_offset_map(precursor_map, all_ms1_frames)

    precursor_array = np.array(
        precursor_list
    )  # 'ID', 'LargestPeakMz', 'AverageMz', 'MonoisotopicMz', 'Charge', 'ScanNumber', 'Intensity', 'Parent'
    # frame_parent_dict = msms_frame_parent_dict(all_frame)
    # parent_ms2_scan_map = build_frame_id_last_ms2_scan_map(precursor_list)

    parent_frame_array = np.array(precursor_array[:, 7])
    frame_index_list = []
    last_val = 0
    for idx, val in enumerate(parent_frame_array):
        if val != last_val:
            frame_index_list.append(idx)
        last_val = val


#    frame_index_list.append(idx + 1)
#  frame_start_end_dict = {}

#for idx, val in enumerate(frame_index_list[:-1]):
# frame_start_end_dict[parent_frame_array[val]] = (frame_index_list[idx], frame_index_list[idx + 1])

    ms2_header = 'H\tExtractor\tTimsTOF_extractor\n' \
                 'H\tExtractorVersion\t{}\n' \
                 'H\tPublicationDate\t20-02-2020\n' \
                 'H\tComments\tTimsTOF_extractor written by Yu Gao, 2018\n' \
                 'H\tComments\tTimsTOF_extractor modified by Titus Jung, 2019\n' \
                 'H\tExtractorOptions\tMSn\n' \
                 'H\tAcquisitionMethod\tData-Dependent\n' \
                 'H\tInstrumentType\tTIMSTOF\n' \
                 'H\tDataType\tCentroid\n' \
                 'H\tScanType\tMS2\n' \
                 'H\tResolution\n' \
                 'H\tIsolationWindow\n' \
                 'H\tFirstScan\t1\n' \
                 'H\tLastScan\t{}\n' \
                 'H\tMonoIsotopic PrecMz\tTrue\n'.format(version, len(msms_data))

    ms1_header = 'H\tExtractor\tTimsTOF_extractor\n' \
                 'H\tExtractorVersion\t{}\n' \
                 'H\tPublicationDate\t20-02-2020\n' \
                 'H\tComments\tTimsTOF_extractor written by Yu Gao, 2018\n' \
                 'H\tComments\tTimsTOF_extractor modified by Titus Jung, 2019\n' \
                 'H\tExtractorOptions\tMSn\n' \
                 'H\tAcquisitionMethod\tData-Dependent\n' \
                 'H\tInstrumentType\tTIMSTOF\n' \
                 'H\tScanType\tMS1\n' .format(version)

    ms2_file_name = os.path.basename(analysis_dir).split('.')[0] + '_nopd.ms2'
    ms1_file_name = os.path.basename(analysis_dir).split('.')[0] + '_nopd.ms1'
    ms1_scan_set = set()
    if len(output) > 0:
        ms2_file_name = output
        ms1_file_name = output.replace('.ms2', '.ms1')
    #else:
    # os.chdir(sys.argv[2])
    if convert_ms2:
        with open(ms2_file_name, 'w') as output_file:
            output_file.write(ms2_header)
            progress = 0
            for row in precursor_list:
                prc_id, largest_preak_mz, average_mz, monoisotopic_mz, cs, scan_number, intensity, parent = row
                prc_id_int = int(prc_id)
                if monoisotopic_mz is not None and cs is not None:
                    prc_mass_mz = float(monoisotopic_mz)
                    prc_mass = (prc_mass_mz * cs) - (cs - 1) * 1.007276466

                    mz_int_arr = td.readPasefMsMs([prc_id_int])
                    parent_index = int(parent)
                    scan_id = ms2_scan_map[parent_index][prc_id_int]
                    rt_time = float(all_frame[parent_index][1])
                    k0 = td.scanNumToOneOverK0(parent_index, [scan_number])
                    mz_arr = mz_int_arr[prc_id_int][0]
                    if len(mz_arr) > 0:
                        output_file.write(
                            "S\t{0:06d}\t{1:06d}\t{2:.4f}\n".format(
                                scan_id, scan_id, prc_mass_mz))
                        output_file.write(
                            "I\tTIMSTOF_Parent_ID\t{}\n".format(parent))
                        output_file.write(
                            "I\tTIMSTOF_Precursor_ID\t{}\n".format(prc_id))
                        output_file.write(
                            "I\tRetTime\t{0:.4f}\n".format(rt_time))
                        output_file.write("I\tIon Mobility\t{0:.4f}\n".format(
                            k0[0]))
                        output_file.write("Z\t{1}\t{0:.4f}\n".format(
                            prc_mass, cs))

                        int_arr = mz_int_arr[prc_id_int][1]
                        for j in range(0, len(mz_arr)):
                            output_file.write("%.4f %.1f \n" %
                                              (mz_arr[j], int_arr[j]))

                    progress += 1
                    if progress % 5000 == 0:
                        print(
                            "progress ms2: %.1f%%" %
                            (float(progress) / len(precursor_list) * 100),
                            time.process_time() - start_time)
    if convert_ms1:
        with open(ms1_file_name, 'w') as output_file:
            output_file.write(ms1_header)
            progress = 0
            prev_id = 0
            #scan_set = set()
            prev_scan = 0
            precursor_counter = 0
            lines = []
            for i, frame in enumerate(all_ms1_frames):
                id = int(frame[0])
                num_scans = int(frame[8])

                index_intensity_arr = td.readScans(id, 0, num_scans)
                index_intensity_carr = np.concatenate(index_intensity_arr,
                                                      axis=1)
                mobility_index = [
                    i for i, row in enumerate(index_intensity_arr)
                    for j in range(len(row[0]))
                ]

                mass_array = td.indexToMz(id, index_intensity_carr[0])
                one_over_k0 = td.scanNumToOneOverK0(id, mobility_index)
                voltage = td.scanNumToVoltage(id, mobility_index)
                temp = np.array(
                    list(
                        zip(mass_array, index_intensity_carr[1], one_over_k0,
                            voltage)))
                mass_intensity = np.around(temp, decimals=4)
                sorted_mass_intensity = mass_intensity[
                    mass_intensity[:, 0].argsort()]
                scan_num = frame_id_ms1_scan_map[id]
                if len(sorted_mass_intensity) > 0:
                    rt_time = 0 if i == 0 else all_ms1_frames[i - 1][1]
                    lines.append("S\t%06d\t%06d\n" % (scan_num, scan_num))
                    lines.append("I\tTIMSTOF_Frame_id\t{}\n".format(id))
                    lines.append("I\tRetTime\t%.2f\n" % float(rt_time))
                    for row in sorted_mass_intensity:
                        x_str = "%.4f %.1f %.4f \n" % (row[0], row[1], row[-2])
                        lines.append(x_str)
                # output_file.write("S\t%06d\t%06d\n" % (scan_num, scan_num))
                # output_file.write("I\tTIMSTOF_Frame_id\t{}\n".format(id))
                # output_file.write("I\tRetTime\t%.2f\n" % float(rt_time))
                # output_file.writelines("%.4f %.1f %.4f\n" % (row[0], row[1],
                # row[-1]) for row in sorted_mass_intensity)
                if len(lines) > 1_000_000:
                    output_file.writelines(lines)
                    lines = []

                progress += 1
                if progress % 5000 == 0:
                    print(
                        "progress ms1 %.1f%%" %
                        (float(progress) / len(all_ms1_frames) * 100),
                        time.process_time() - start_time)
            output_file.writelines(lines)
            lines = []
    conn.close()