Exemplo n.º 1
0
    def test_main(self, peak, im_i):

        area_sum, area_dict = peak_sum_area(im_i,
                                            peak,
                                            single_ion=True,
                                            max_bound=5)
        assert isinstance(area_sum, float)
        assert isinstance(area_dict, dict)
        assert area_sum == 10025814.0
        assert area_dict[51] == 3299.0
        assert isinstance(area_dict[51], float)

        area_sum = peak_sum_area(im_i, peak, single_ion=False, max_bound=5)
        assert area_sum == 10025814.0
Exemplo n.º 2
0
def Peak_detector(pp_im):
    """
    Peak detection and filtering and selection
    :param pp_im:
    :return:
    """

    peakz = []
    UID_list = []
    counter = 1

    for im in list(pp_im):

        poss_peaks = BillerBiemann(im, points=9, scans=2)                   #increase scan #
        pi = rel_threshold(poss_peaks, percent=2)
        nin = num_ions_threshold(pi, n=5, cutoff=10000)
        for peak in nin:
            area = peak_sum_area(im, peak)
            peak.set_area(area)

        peakz.append(nin)
        print("...", counter)
        counter += 1

    for pkz in peakz:pi
        print("Peaks detected: ", len(pkz))
        uid = pkz.get_UID()
        UID_list.append(uid)
Exemplo n.º 3
0
def expr_list(pyms_datadir):

    with tempfile.TemporaryDirectory() as tmpdir:
        outputdir = pathlib.Path(tmpdir)

        # Create experiment files
        for jcamp_file in eley_codes:

            im = build_intensity_matrix_i(
                JCAMP_reader(pyms_datadir / f"{jcamp_file}.JDX"))

            # Intensity matrix size (scans, masses)
            n_scan, n_mz = im.size

            # noise filter and baseline correct
            for ii in range(n_mz):
                ic = im.get_ic_at_index(ii)
                ic_smooth = savitzky_golay(ic)
                ic_bc = tophat(ic_smooth, struct="1.5m")
                im.set_ic_at_index(ii, ic_bc)

            peak_list = BillerBiemann(im, points=9, scans=2)

            print('#')
            apl = rel_threshold(peak_list, 2)
            new_peak_list = num_ions_threshold(apl, 3, 3000)
            print('#')

            # ignore TMS ions and set mass range
            for peak in new_peak_list:
                peak.crop_mass(50, 400)
                peak.null_mass(73)
                peak.null_mass(147)

                # find area
                area = peak_sum_area(im, peak)
                peak.area = area
                area_dict = peak_top_ion_areas(im, peak)
                peak.ion_areas = area_dict

            expr = Experiment(jcamp_file, new_peak_list)

            # set time range for all experiments
            expr.sele_rt_range(["6.5m", "21m"])

            print('#')
            expr.dump(outputdir / f"{jcamp_file}.expr")
            print('#')

        # Load experiments
        expr_list = []
        for expr_code in eley_codes:
            expr = load_expr(outputdir / f"{expr_code}.expr")
            assert isinstance(expr, Experiment)
            expr_list.append(expr)

        yield expr_list
Exemplo n.º 4
0
def call_peaks(im, tic, smooth, args):
    print "calling peaks"
    if smooth:
        print "Smoothing IM first..."
        im.crop_mass(args.lowmass, args.highmass)
        print "cropped masses..."
        # get the size of the intensity matrix
        n_scan, n_mz = im.get_size()
        print "# masses in intensity matrix: ", n_mz
        # smooth data
        for ii in range(n_mz):
            ic = im.get_ic_at_index(ii)
            #print "got ic for mass ", ii
            # ic1 = savitzky_golay(ic)
            ic_smooth = savitzky_golay(ic, window=args.window,
                                       degree=4)  #JT: changed to 4 from 2
            #print "savitky golay ran "
            ic_base = tophat(ic_smooth, struct="1.5m")
            #print "tophat ran "
            im.set_ic_at_index(ii, ic_base)
            #print "smoothed mass ", ii
        print "smoothed IM..."
        # noise level calc
        tic1 = savitzky_golay(tic)
        tic2 = tophat(tic1, struct="1.5m")  #JT: How does struct size work?
        noise_level = window_analyzer(tic2)
        print "Noise level in TIC: ", noise_level

    # get the list of Peak objects using BB peak detection / deconv
    pl = BillerBiemann(im, args.window, args.scans)
    print "Initial number of Peaks found:", len(pl)

    # filter down the peaks.
    #   - First: remove any masses from each peak that have intensity less than r percent of the max intensity in that peak
    #   - Second: remove any peak where there are less than n ions with intensity above the cutoff
    pl2 = rel_threshold(pl, percent=args.minintensity)
    pl3 = num_ions_threshold(
        pl2, n=args.minions, cutoff=100000
    )  #100000 for pegBT  #200 for peg3 #minions maybe 3 instead of 4?

    #JT: Was getting very different noise cutoff values so just made it 10^5
    # Which was decided on by looking at chromatograms to find baseline noise lvl
    print "Peaks remaining after filtering:", len(pl3)

    for peak in pl3:
        #peak.null_mass(73)
        #peak.null_mass(207)     # column bleed
        #peak.null_mass(84)      # solvent tailing

        area = peak_sum_area(im, peak)  # get the TIC area for this peak
        peak.set_area(area)
        area_dict = peak_top_ion_areas(
            im, peak, args.topions)  # get top n ion areas for this peak
        peak.set_ion_areas(area_dict)

    return pl3
Exemplo n.º 5
0
def call_peaks(im, tic, smooth, args):
    print "calling peaks"
    if smooth:
        print "Smoothing IM first..."
        im.crop_mass(args.lowmass, args.highmass)
        print "cropped masses..."
        # get the size of the intensity matrix
        n_scan, n_mz = im.get_size()
        print "# masses in intensity matrix: ", n_mz
        # smooth data
        for ii in range(n_mz):
            ic = im.get_ic_at_index(ii)
            #print "got ic for mass ", ii
            # ic1 = savitzky_golay(ic)
            ic_smooth = savitzky_golay(ic, window=args.window, degree=2)
            #print "savitky golay ran "
            ic_base = tophat(ic_smooth, struct="1.5m")
            #print "tophat ran "
            im.set_ic_at_index(ii, ic_base)
            #print "smoothed mass ", ii
        print "smoothed IM..."
        # noise level calc
        tic1 = savitzky_golay(tic)
        tic2 = tophat(tic1, struct="1.5m")
        noise_level = window_analyzer(tic2)
        print "Noise level in TIC: ", noise_level


    # get the list of Peak objects using BB peak detection / deconv
    pl = BillerBiemann(im, args.window, args.scans)
    print "Initial number of Peaks found:", len(pl)


    # filter down the peaks.
    #   - First: remove any masses from each peak that have intensity less than r percent of the max intensity in that peak
    #   - Second: remove any peak where there are less than n ions with intensity above the cutoff
    pl2 = rel_threshold(pl, percent=args.minintensity)
    pl3 = num_ions_threshold(pl2, n=args.minions, cutoff=noise_level * args.noisemult)
    print "Peaks remaining after filtering:", len(pl3)

    for peak in pl3:
        # peak.null_mass(73)
        peak.null_mass(207)     # column bleed
        peak.null_mass(84)      # solvent tailing

        area = peak_sum_area(im, peak)  # get the TIC area for this peak
        peak.set_area(area)
        area_dict = peak_top_ion_areas(im, peak, args.topions)  # get top n ion areas for this peak
        peak.set_ion_areas(area_dict)

    return pl3
Exemplo n.º 6
0
def test_area(im_i, peak):
    peak = copy.deepcopy(peak)

    # determine and set area
    area = peak_sum_area(im_i, peak)
    assert isinstance(area, float)
    peak.area = area

    assert peak.area == area
    assert isinstance(peak.area, float)

    scan_i = im_i.get_index_at_time(31.17 * 60.0)
    ms = im_i.get_ms_at_index(scan_i)

    for obj in [test_string, test_dict, test_list_strs, test_list_ints]:
        with pytest.raises(TypeError):
            Peak(test_float, ms).area = obj
    with pytest.raises(ValueError):
        Peak(test_float, ms).area = -1
Exemplo n.º 7
0
def _filtered_peak_list(im_i, _peak_list):
    peak_list = copy.deepcopy(_peak_list)
    # do peak detection on pre-trimmed data
    # trim by relative intensity
    apl = rel_threshold(peak_list, 2, copy_peaks=False)

    # trim by threshold
    new_peak_list = num_ions_threshold(apl, 3, 3000, copy_peaks=False)

    # ignore TMS ions and set mass range
    for peak in new_peak_list:
        peak.crop_mass(50, 400)
        peak.null_mass(73)
        peak.null_mass(147)

        # find area
        area = peak_sum_area(im_i, peak)
        peak.area = area
        area_dict = peak_top_ion_areas(im_i, peak)
        peak.ion_areas = area_dict

    return new_peak_list
Exemplo n.º 8
0
def Peak_detector(pp_im):
    # Peak detection and filtering and selection
    peakz = []
    counter = 1

    for im in list(pp_im):

        poss_peaks = BillerBiemann(im, points=9, scans=2)
        pi = rel_threshold(poss_peaks, percent=2)
        nin = num_ions_threshold(pi, n=5, cutoff=10000)
        for peak in nin:
            area = peak_sum_area(im, peak)
            peak.set_area(area)

        peakz.append(nin)
        print("...", counter)
        counter += 1

    for pkz in peakz:
        print("Peaks detected: ", len(pkz))

    return (peakz)
Exemplo n.º 9
0
 def test_peak_errors(self, im_i, obj):
     with pytest.raises(TypeError):
         peak_sum_area(im_i, obj)
Exemplo n.º 10
0
 def test_im_errors(self, peak, obj):
     with pytest.raises(TypeError):
         peak_sum_area(obj, peak)
Exemplo n.º 11
0
def Peak_detector(pp_im, noise, name, points, scans, percent, ni, name_tag,
                  sdir):
    # Peak detection and filtering and selection
    peakz = []
    # counter = 1
    savePath = sdir
    ms_data_files = []

    print("len pp_im", len(list(pp_im)))
    print("len noise", len(noise))
    print("len name", len(name), name)

    for im, n, na in itertools.izip(list(pp_im), noise, name):

        ms_data = []

        # print(na)
        poss_peaks = BillerBiemann(im, points=points,
                                   scans=scans)  # increase scan #
        pi = rel_threshold(poss_peaks, percent=percent)
        nin = num_ions_threshold(pi, n=ni, cutoff=n)

        completeName = os.path.join(savePath, na + name_tag + "ms_data.csv")
        with open(completeName, 'w') as f:
            w = csv.writer(f)
            # head = [35.0, 36.0, 37.0, 38.0, 39.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0, 47.0, 48.0, 49.0, 50.0, 51.0, 52.0, 53.0, 54.0, 55.0, 56.0, 57.0, 58.0, 59.0, 60.0, 61.0, 62.0, 63.0, 64.0, 65.0, 66.0, 67.0, 68.0, 69.0, 70.0, 71.0, 72.0, 73.0, 74.0, 75.0, 76.0, 77.0, 78.0, 79.0, 80.0, 81.0, 82.0, 83.0, 84.0, 85.0, 86.0, 87.0, 88.0, 89.0, 90.0, 91.0, 92.0, 93.0, 94.0, 95.0, 96.0, 97.0, 98.0, 99.0, 100.0, 101.0, 102.0, 103.0, 104.0, 105.0, 106.0, 107.0, 108.0, 109.0, 110.0, 111.0, 112.0, 113.0, 114.0, 115.0, 116.0, 117.0, 118.0, 119.0, 120.0, 121.0, 122.0, 123.0, 124.0, 125.0, 126.0, 127.0, 128.0, 129.0, 130.0, 131.0, 132.0, 133.0, 134.0, 135.0, 136.0, 137.0, 138.0, 139.0, 140.0, 141.0, 142.0, 143.0, 144.0, 145.0, 146.0, 147.0, 148.0, 149.0, 150.0, 151.0, 152.0, 153.0, 154.0, 155.0, 156.0, 157.0, 158.0, 159.0, 160.0, 161.0, 162.0, 163.0, 164.0, 165.0, 166.0, 167.0, 168.0, 169.0, 170.0, 171.0, 172.0, 173.0, 174.0, 175.0, 176.0, 177.0, 178.0, 179.0, 180.0, 181.0, 182.0, 183.0, 184.0, 185.0, 186.0, 187.0, 188.0, 189.0, 190.0, 191.0, 192.0, 193.0, 194.0, 195.0, 196.0, 197.0, 198.0, 199.0, 200.0, 201.0, 202.0, 203.0, 204.0, 205.0, 206.0, 207.0, 208.0, 209.0, 210.0, 211.0, 212.0, 213.0, 214.0, 215.0, 216.0, 217.0, 218.0, 219.0, 220.0]
            head = [
                'Area', 'RTs', 35.0, 36.0, 37.0, 38.0, 39.0, 40.0, 41.0, 42.0,
                43.0, 44.0, 45.0, 46.0, 47.0, 48.0, 49.0, 50.0, 51.0, 52.0,
                53.0, 54.0, 55.0, 56.0, 57.0, 58.0, 59.0, 60.0, 61.0, 62.0,
                63.0, 64.0, 65.0, 66.0, 67.0, 68.0, 69.0, 70.0, 71.0, 72.0,
                73.0, 74.0, 75.0, 76.0, 77.0, 78.0, 79.0, 80.0, 81.0, 82.0,
                83.0, 84.0, 85.0, 86.0, 87.0, 88.0, 89.0, 90.0, 91.0, 92.0,
                93.0, 94.0, 95.0, 96.0, 97.0, 98.0, 99.0, 100.0, 101.0, 102.0,
                103.0, 104.0, 105.0, 106.0, 107.0, 108.0, 109.0, 110.0, 111.0,
                112.0, 113.0, 114.0, 115.0, 116.0, 117.0, 118.0, 119.0, 120.0,
                121.0, 122.0, 123.0, 124.0, 125.0, 126.0, 127.0, 128.0, 129.0,
                130.0, 131.0, 132.0, 133.0, 134.0, 135.0, 136.0, 137.0, 138.0,
                139.0, 140.0, 141.0, 142.0, 143.0, 144.0, 145.0, 146.0, 147.0,
                148.0, 149.0, 150.0, 151.0, 152.0, 153.0, 154.0, 155.0, 156.0,
                157.0, 158.0, 159.0, 160.0, 161.0, 162.0, 163.0, 164.0, 165.0,
                166.0, 167.0, 168.0, 169.0, 170.0, 171.0, 172.0, 173.0, 174.0,
                175.0, 176.0, 177.0, 178.0, 179.0, 180.0, 181.0, 182.0, 183.0,
                184.0, 185.0, 186.0, 187.0, 188.0, 189.0, 190.0, 191.0, 192.0,
                193.0, 194.0, 195.0, 196.0, 197.0, 198.0, 199.0, 200.0, 201.0,
                202.0, 203.0, 204.0, 205.0, 206.0, 207.0, 208.0, 209.0, 210.0,
                211.0, 212.0, 213.0, 214.0, 215.0, 216.0, 217.0, 218.0, 219.0,
                220.0
            ]

            w.writerow(head)
            for peak in nin:

                area = peak_sum_area(im, peak)
                # print('area:', area)
                peak.set_area(area)
                ms = peak.get_mass_spectrum()
                # print("Peaks rt: ", peak.get_rt())
                # print("Peaks ms_list: ", ms.mass_list)
                # print("Peaks ms_spec: ", list(ms.mass_spec))
                p_rt = peak.get_rt()
                its = []
                items = list(ms.mass_spec)
                for i in items:
                    x = float(i)
                    its.append(x)

                ms_d = ([area] + [p_rt] + its)
                # ms_d = its
                # print('ms_d', ms_d)
                w.writerow(ms_d)

            f.close()

        peakz.append(nin)
        # #print("...", counter)
        # counter += 1
        ms_data_files.append(completeName)
    print('ms_data_files:', ms_data_files)

    return [peakz, ms_data_files]
Exemplo n.º 12
0
def Peak_detector(pp_im, noise, name):
    # Peak detection and filtering and selection
    peakz = []
    counter = 1
    savePath = '/home/juicebox/utils/easyGC/MS_peak_data'

    for im, n, na in itertools.izip(list(pp_im), noise, name):

        ms_data = []

        #print(na)
        poss_peaks = BillerBiemann(im, points=140, scans=20)  #increase scan #
        pi = rel_threshold(poss_peaks, percent=2)
        nin = num_ions_threshold(pi, n=3, cutoff=n)

        completeName = os.path.join(savePath, na + "2y.csv")
        with open(completeName, 'w') as f:
            w = csv.writer(f)
            head = [
                'RTs', 35.0, 36.0, 37.0, 38.0, 39.0, 40.0, 41.0, 42.0, 43.0,
                44.0, 45.0, 46.0, 47.0, 48.0, 49.0, 50.0, 51.0, 52.0, 53.0,
                54.0, 55.0, 56.0, 57.0, 58.0, 59.0, 60.0, 61.0, 62.0, 63.0,
                64.0, 65.0, 66.0, 67.0, 68.0, 69.0, 70.0, 71.0, 72.0, 73.0,
                74.0, 75.0, 76.0, 77.0, 78.0, 79.0, 80.0, 81.0, 82.0, 83.0,
                84.0, 85.0, 86.0, 87.0, 88.0, 89.0, 90.0, 91.0, 92.0, 93.0,
                94.0, 95.0, 96.0, 97.0, 98.0, 99.0, 100.0, 101.0, 102.0, 103.0,
                104.0, 105.0, 106.0, 107.0, 108.0, 109.0, 110.0, 111.0, 112.0,
                113.0, 114.0, 115.0, 116.0, 117.0, 118.0, 119.0, 120.0, 121.0,
                122.0, 123.0, 124.0, 125.0, 126.0, 127.0, 128.0, 129.0, 130.0,
                131.0, 132.0, 133.0, 134.0, 135.0, 136.0, 137.0, 138.0, 139.0,
                140.0, 141.0, 142.0, 143.0, 144.0, 145.0, 146.0, 147.0, 148.0,
                149.0, 150.0, 151.0, 152.0, 153.0, 154.0, 155.0, 156.0, 157.0,
                158.0, 159.0, 160.0, 161.0, 162.0, 163.0, 164.0, 165.0, 166.0,
                167.0, 168.0, 169.0, 170.0, 171.0, 172.0, 173.0, 174.0, 175.0,
                176.0, 177.0, 178.0, 179.0, 180.0, 181.0, 182.0, 183.0, 184.0,
                185.0, 186.0, 187.0, 188.0, 189.0, 190.0, 191.0, 192.0, 193.0,
                194.0, 195.0, 196.0, 197.0, 198.0, 199.0, 200.0, 201.0, 202.0,
                203.0, 204.0, 205.0, 206.0, 207.0, 208.0, 209.0, 210.0, 211.0,
                212.0, 213.0, 214.0, 215.0, 216.0, 217.0, 218.0, 219.0, 220.0
            ]
            w.writerow(head)
            for peak in nin:

                area = peak_sum_area(im, peak)
                print('area=', area)
                peak.set_area(area)
                ms = peak.get_mass_spectrum()
                #print("Peaks rt: ", peak.get_rt())
                #print("Peaks ms_list: ", ms.mass_list)
                print("Peaks ms_spec: ", list(ms.mass_spec))
                p_rt = peak.get_rt()
                its = []
                items = list(ms.mass_spec)
                for i in items:
                    x = float(i)
                    its.append(x)
                ms_d = ([p_rt] + its)
                print(ms_d)
                # c = str(ms_d).split(',')
                #f.write(str(ms_d))

                w.writerow(ms_d)
        f.close()
        #
        #
        #         #print(peak.get_rt(), items)
        #         # ms_d = ([peak.get_rt()] + its)
        #         # print(ms_d)
        #         # w = csv.writer(f)
        #         # w.writerow(x for x in list(ms_d))
        #
        #         # w = csv.writer(f, delimiter=',')
        #         # w.writerows(list[p_rt + items])
        #         # ms_data.append((peak.get_rt(), list(ms.mass_spec)))
        #         # completeName = os.path.join(savePath, na+"2b.csv")
        #         # f = open(completeName, "w+")
        #         # for i in ms_data:
        #         #     f.write("%s" % str(i))
        #         # f.close()
        #         # with open(completeName, 'w') as f:
        #         #     f.write(str([peak.get_rt()] + items) + '\n')
        #         # f.write(str([peak.get_rt()] + items) + '\n')
        #         # f.write(str(peak.get_rt()) + str(items).replace('[', '').replace(']', '') + '\n')
        #         # x = str(peak.get_rt()) + str(items).replace('[', '').replace(']', '')
        #         # y = x.split(',')
        #         # print (str(y))
        #         # f.write(str(y) + '\n')

        peakz.append(nin)
        #print("...", counter)
        counter += 1

    #for pkz in peakz:
    # print("Peaks detected: ", len(pkz))
    #print("Peaks rt: ", pkz.get_rt())
    #print("Peaks ms: ", pkz.get_mass_spectrum())

    return peakz
Exemplo n.º 13
0
# minimum number of ions, n
n = 3
# greater than or equal to threshold, t
t = 10000

# trim by relative intensity
pl = rel_threshold(peak_list, r)

# trim by threshold
real_peak_list = num_ions_threshold(pl, n, t)
print "Number of filtered peaks in real data: ", len(real_peak_list)

# Set the peak areas
for peak in real_peak_list:
    area = peak_sum_area(real_im, peak)
    peak.set_area(area)


# real_peak_list is PyMS' best guess at the true peak list

################## Run Simulator ######################
# Simulator takes a peak list, time_list and mass_list
# and returns an IntensityMatrix object.
# The mass_list and time_list are the same for the real
# data and the simulated data.

time_list = real_im.get_time_list()
mass_list = real_im.get_mass_list()

sim_im = gcms_sim(time_list, mass_list, real_peak_list)
Exemplo n.º 14
0
    def run(self):
        print("Quantitative Processing in Progress...")

        # TODO: Include data etc. in experiment file

        self.update_pbar()

        if self.filetype == ID_Format_jcamp:
            # Load data using JCAMP_reader
            from pyms.GCMS.IO.JCAMP import JCAMP_reader
            data = JCAMP_reader(self.properties["Original Filename"])

        elif self.filetype == ID_Format_mzML:
            # Load data using JCAMP_reader
            from pyms.GCMS.IO.MZML import MZML_reader
            data = MZML_reader(self.properties["Original Filename"])

        elif self.filetype == ID_Format_ANDI:
            # Load data using JCAMP_reader
            from pyms.GCMS.IO.ANDI import ANDI_reader
            data = ANDI_reader(self.properties["Original Filename"])

        else:
            # Unknown Format
            return
        # TODO: Waters RAW, Thermo RAW, Agilent .d

        self.update_pbar()

        method = Method.Method(self.properties["Method"])

        self.update_pbar()

        # list of all retention times, in seconds
        times = data.get_time_list()
        # get Total Ion Chromatogram
        tic = data.get_tic()
        # RT Range, time step, no. scans, min, max, mean and median m/z
        data.info()

        # Build "intensity matrix" by binning data with integer bins and a
        # 	window of -0.3 to +0.7, the same as NIST uses
        im = build_intensity_matrix_i(data)

        self.update_pbar()

        # Show the m/z of the maximum and minimum bins
        print(" Minimum m/z bin: {}".format(im.get_min_mass()))
        print(" Maximum m/z bin: {}".format(im.get_max_mass()))

        # Crop masses
        min_mass, max_mass, *_ = method.mass_range

        if min_mass < im.get_min_mass():
            min_mass = im.get_min_mass()
        if max_mass > im.get_max_mass():
            max_mass = im.get_max_mass()
        im.crop_mass(min_mass, max_mass)

        self.update_pbar()

        # Perform Data filtering
        n_scan, n_mz = im.get_size()

        # Iterate over each IC in the intensity matrix
        for ii in range(n_mz):
            # print("\rWorking on IC#", ii+1, '  ',end='')
            ic = im.get_ic_at_index(ii)

            if method.enable_sav_gol:
                # Perform Savitzky-Golay smoothing.
                # Note that Turbomass does not use smoothing for qualitative method.
                ic = savitzky_golay(ic)

            if method.enable_tophat:
                # Perform Tophat baseline correction
                # Top-hat baseline Correction seems to bring down noise,
                #  		retaining shapes, but keeps points on actual peaks
                ic = tophat(ic, struct=method.tophat_struct)

            # Set the IC in the intensity matrix to the filtered one
            im.set_ic_at_index(ii, ic)

            self.update_pbar()

        # Peak Detection based on Biller and Biemann (1974), with a window
        # 	of <points>, and combining <scans> if they apex next to each other
        peak_list = BillerBiemann(im,
                                  points=method.bb_points,
                                  scans=method.bb_scans)

        self.update_pbar()

        print(" Number of peaks identified before filtering: {}".format(
            len(peak_list)))

        if method.enable_noise_filter:
            # Filtering peak lists with automatic noise filtering
            noise_level = window_analyzer(tic)
            # should we also do rel_threshold() here?
            # https://pymassspec.readthedocs.io/en/master/pyms/BillerBiemann.html#pyms.BillerBiemann.rel_threshold
            peak_list = num_ions_threshold(peak_list, method.noise_thresh,
                                           noise_level)

        self.update_pbar()

        filtered_peak_list = []

        for peak in peak_list:
            # Get mass and intensity lists for the mass spectrum at the apex of the peak
            apex_mass_list = peak.mass_spectrum.mass_list
            apex_mass_spec = peak.mass_spectrum.mass_spec

            # Determine the intensity of the base peak in the mass spectrum
            base_peak_intensity = max(apex_mass_spec)

            # Determine the index of the base peak in the mass spectrum
            base_peak_index = [
                index for index, intensity in enumerate(apex_mass_spec)
                if intensity == base_peak_intensity
            ][0]

            # Finally, determine the mass of the base peak
            base_peak_mass = apex_mass_list[base_peak_index]

            # skip the peak if the base peak is at e.g. m/z 73, i.e. septum bleed
            if base_peak_mass in method.base_peak_filter:
                continue

            area = peak_sum_area(im, peak)
            peak.set_area(area)
            filtered_peak_list.append(peak)

            self.update_pbar()

        print(" Number of peaks identified: {}".format(
            len(filtered_peak_list)))

        # Create an experiment
        self.expr = Experiment(self.sample_name, filtered_peak_list)
        self.expr.sele_rt_range([
            "{}m".format(method.target_range[0]),
            "{}m".format(method.target_range[1])
        ])

        self.update_pbar()

        current_time = time_now()

        # The date and time the experiment was created
        self.properties["Date Created"] = current_time

        # The date and time the experiment was last modified
        self.properties["Date Modified"] = current_time

        if self.pbar:
            self.pbar.Update(self.pbar.Range)

        self.tic = tic
        self.filtered_peak_list = filtered_peak_list
Exemplo n.º 15
0
def import_processing(jcamp_file, spectrum_csv_file, report_csv_file, combined_csv_file, bb_points = 9, bb_scans = 2, noise_thresh = 2, target_range = (0,120), tophat_struct="1.5m", nistpath = "../MSSEARCH", base_peak_filter = ['73'], ExprDir = "."):		
	global nist_path
	nist_path = nistpath
	
	# Parameters
	base_peak_filter = [int(x) for x in base_peak_filter]
	target_range = tuple(target_range)
	sample_name = os.path.splitext(os.path.basename(jcamp_file))[0]
	number_of_peaks = 80
	
	data = JCAMP_reader(jcamp_file)
	
	# list of all retention times, in seconds
	times = data.get_time_list()
	# get Total Ion Chromatogram
	tic = data.get_tic() 
	# RT Range, time step, no. scans, min, max, mean and median m/z
	data.info()
	
	#data.write("output/data") # save output
	
	# Mass Binning	
	im = build_intensity_matrix_i(data) # covnert to intensity matrix
	#im.get_size() #number of scans, number of bins
	masses = im.get_mass_list() # list of mass bins
	
	print(" Minimum m/z bin: {}".format(im.get_min_mass()))
	print(" Maximum m/z bin: {}".format(im.get_max_mass()))
	
	# Write Binned Mass Spectra to OpenChrom-like CSV file
	ms = im.get_ms_at_index(0) # first mass spectrum
	spectrum_csv = open(spectrum_csv_file, 'w')
	spectrum_csv.write('RT(milliseconds);RT(minutes) - NOT USED BY IMPORT;RI;')
	spectrum_csv.write(';'.join(str(mz) for mz in ms.mass_list))
	spectrum_csv.write("\n")
		
	for scan in range(len(times)):
		spectrum_csv.write("{};{};{};".format(int(times[scan]*1000),rounders((times[scan]/60),"0.0000000000"),0))	
		ms = im.get_ms_at_index(scan)
		spectrum_csv.write(';'.join(str(intensity) for intensity in ms.mass_spec))
		spectrum_csv.write('\n')
	spectrum_csv.close()
	
	## Data filtering

	# Note that Turbomass does not use smoothing for qualitative method.	
	# Top-hat baseline Correction seems to bring down noise,
	#  retaning shapes, but keeps points on actual peaks
	
	#dump_object(im, "output/im.dump") # un-processed output

	n_scan, n_mz = im.get_size()
	for ii in range(n_mz):
		#print("\rWorking on IC#", ii+1, '  ',end='')
		ic = im.get_ic_at_index(ii)
		ic_smooth = savitzky_golay(ic)
		ic_bc = tophat(ic_smooth, struct=tophat_struct)
		im.set_ic_at_index(ii, ic_bc)

	#dump_object(im, "output/im-proc.dump") # processed output
		
	# Peak Detection based on Biller and Biemann, 1974, with a window
	#  of n points, and combining y scans if they apex next to each other
	peak_list = BillerBiemann(im, points=bb_points, scans=bb_scans) 
	
	print(" Number of peaks identified before filtering: {}".format(len(peak_list)))
	
	# Filtering peak lists with automatic noise filtering
	noise_level = window_analyzer(tic)
	peak_list = num_ions_threshold(peak_list, noise_thresh, noise_level)
	# why use 2 for number of ions above threshold?
	print(" Number of peaks identified: {}".format(len(peak_list)))

	# Peak Areas
	peak_area_list = []
	filtered_peak_list = []
	
	for peak in peak_list:
		apex_mass_list = peak.get_mass_spectrum().mass_list
		apex_mass_spec = peak.get_mass_spectrum().mass_spec
		base_peak_intensity = max(apex_mass_spec)
		base_peak_index = [index for index, intensity in enumerate(apex_mass_spec) if intensity == base_peak_intensity][0]
		base_peak_mass = apex_mass_list[base_peak_index]
		#print(base_peak_mass)
		if base_peak_mass in base_peak_filter:
			continue # skip the peak if the base peak is at e.g. m/z 73, i.e. septum bleed
		
		area = peak_sum_area(im, peak)
		peak.set_area(area)
		peak_area_list.append(area)
		filtered_peak_list.append(peak)
	
	# Save the TIC and Peak List
	tic.write(os.path.join(ExprDir,"{}_tic.dat".format(sample_name)),formatting=False)
	store_peaks(filtered_peak_list,os.path.join(ExprDir,"{}_peaks.dat".format(sample_name)))
	
	# from https://stackoverflow.com/questions/16878715/how-to-find-the-index-of-n-largest-elements-in-a-list-or-np-array-python?lq=1
	top_peaks = sorted(range(len(peak_area_list)), key=lambda x: peak_area_list[x])
	
	# Write to turbomass-like CSV file
	report_csv = open(report_csv_file, "w")
	
	# Write to GunShotMatch Combine-like CSV file
	combine_csv = open(combined_csv_file, "w")
	
	combine_csv.write(sample_name)
	combine_csv.write("\n")
		
	report_csv.write("#;RT;Scan;Height;Area\n")
	combine_csv.write("Retention Time;Peak Area;;Lib;Match;R Match;Name;CAS Number;Scan\n")
	
	report_buffer = []
	
	for index in top_peaks:
		# Peak Number (1-80)
		peak_number = top_peaks.index(index)+1 
		# Retention time (minutes, 3dp)
		RT = rounders(filtered_peak_list[index].get_rt()/60,"0.000") 
		
		if not target_range[0] < RT <= target_range[1]:
			continue # skip the peak if it is outside the desired range
		
		# scan number, not that we really nead it as the peak object has the spectrum
		Scan = data.get_index_at_time(filtered_peak_list[index].get_rt())+1 
		# the binned mass spectrum
		filtered_peak_list[index].get_mass_spectrum() 
		# TIC intensity, as proxy for Peak height, which should be from baseline
		Height = '{:,}'.format(rounders(tic.get_intensity_at_index(data.get_index_at_time(filtered_peak_list[index].get_rt())),"0"))
		# Peak area, originally in "intensity seconds", so dividing by 60 to
		#  get "intensity minutes" like turbomass uses
		Area = '{:,}'.format(rounders(filtered_peak_list[index].get_area()/60,"0.0")) 
		
		#report_csv.write("{};{};{};{};{};{}\n".format(peak_number, RT, Scan, Height, Area,bounds))
		report_buffer.append([peak_number, RT, Scan, Height, Area])

	report_buffer = report_buffer[::-1] # Reverse list order

	# List of peaks already added to report
	existing_peaks = []

	filtered_report_buffer = []
	
	for row in report_buffer:
		filtered_report_buffer.append(row)
	
	filtered_report_buffer = filtered_report_buffer[:number_of_peaks]
	
	filtered_report_buffer.sort(key=operator.itemgetter(2))
	
	for row in filtered_report_buffer:
		index = filtered_report_buffer.index(row)
		report_csv.write(";".join([str(i) for i in row]))
		
		ms = im.get_ms_at_index(row[2]-1)
		
		create_msp("{}_{}".format(sample_name,row[1]),ms.mass_list, ms.mass_spec)
		matches_dict = nist_ms_comparison("{}_{}".format(sample_name,row[1]),ms.mass_list, ms.mass_spec)
		
		combine_csv.write("{};{};Page {} of 80;;;;;;{}\n".format(row[1],row[4],index+1,row[2]))
		
		for hit in range(1,6):
			report_csv.write(str(matches_dict["Hit{}".format(hit)]))
			report_csv.write(";")
			combine_csv.write(";;{};{};{};{};{};{};\n".format(hit,
					matches_dict["Hit{}".format(hit)]["Lib"],
					matches_dict["Hit{}".format(hit)]["MF"],
					matches_dict["Hit{}".format(hit)]["RMF"],
					matches_dict["Hit{}".format(hit)]["Name"],
					matches_dict["Hit{}".format(hit)]["CAS"],
					))

		report_csv.write("\n")
		
		time.sleep(2)
		
	report_csv.close()
	combine_csv.close()
	
	# Create an experiment
	expr = Experiment(sample_name, filtered_peak_list)
	expr.sele_rt_range(["{}m".format(target_range[0]),"{}m".format(target_range[1])])
	store_expr(os.path.join(ExprDir,"{}.expr".format(sample_name)), expr)
	
	return 0
Exemplo n.º 16
0
# minimum number of ions, n
n = 3
# greater than or equal to threshold, t
t = 10000

# trim by relative intensity
pl = rel_threshold(peak_list, r)

# trim by threshold
real_peak_list = num_ions_threshold(pl, n, t)
print("Number of filtered peaks in real data: ", len(real_peak_list))

# Set the peak areas
for peak in real_peak_list:
    area = peak_sum_area(real_im, peak)
    peak.area = area

# real_peak_list is PyMassSpec' best guess at the true peak list

################## Run Simulator ######################
# Simulator takes a peak list, time_list and mass_list
# and returns an IntensityMatrix object.
# The mass_list and time_list are the same for the real
# data and the simulated data.

time_list = real_im.time_list
mass_list = real_im.mass_list

sim_im = gcms_sim(time_list, mass_list, real_peak_list)
# sim_im is an IntensityMatrix object
Exemplo n.º 17
0
def test_align_2_alignments(A1, pyms_datadir, tmp_pathplus):
    expr_list = []

    for jcamp_file in geco_codes:
        im = build_intensity_matrix_i(
            JCAMP_reader(pyms_datadir / f"{jcamp_file}.JDX"))

        # Intensity matrix size (scans, masses)
        n_scan, n_mz = im.size

        # noise filter and baseline correct
        for ii in range(n_mz):
            ic = im.get_ic_at_index(ii)
            ic_smooth = savitzky_golay(ic)
            ic_bc = tophat(ic_smooth, struct="1.5m")
            im.set_ic_at_index(ii, ic_bc)

        peak_list = BillerBiemann(im, points=9, scans=2)
        apl = rel_threshold(peak_list, 2)
        new_peak_list = num_ions_threshold(apl, 3, 3000)

        # ignore TMS ions and set mass range
        for peak in new_peak_list:
            peak.crop_mass(50, 400)
            peak.null_mass(73)
            peak.null_mass(147)

            # find area
            area = peak_sum_area(im, peak)
            peak.area = area
            area_dict = peak_top_ion_areas(im, peak)
            peak.ion_areas = area_dict

        expr = Experiment(jcamp_file, new_peak_list)

        # set time range for all experiments
        expr.sele_rt_range(["6.5m", "21m"])

        expr_list.append(expr)

    F2 = exprl2alignment(expr_list)
    T2 = PairwiseAlignment(F2, Dw, Gw)
    A2 = align_with_tree(T2, min_peaks=2)

    # top_ion_list = A2.common_ion()
    # A2.write_common_ion_csv(tmp_pathplus/'area1.csv', top_ion_list)

    # between replicates alignment parameters
    Db = 10.0  # rt modulation
    Gb = 0.30  # gap penalty

    print("Aligning input {1,2}")
    T9 = PairwiseAlignment([A1, A2], Db, Gb)
    A9 = align_with_tree(T9)

    A9.write_csv(tmp_pathplus / "rt.csv", tmp_pathplus / "area.csv")

    aligned_peaks = list(filter(None, A9.aligned_peaks()))
    store_peaks(aligned_peaks, tmp_pathplus / "peaks.bin")

    top_ion_list = A9.common_ion()
    A9.write_common_ion_csv(tmp_pathplus / "area.csv", top_ion_list)
Exemplo n.º 18
0
 def test_max_bound_errors(self, im_i, peak, obj):
     with pytest.raises(TypeError):
         peak_sum_area(im_i, peak, max_bound=obj)
Exemplo n.º 19
0
# do peak detection on pre-trimmed data

# get the list of Peak objects
pl = BillerBiemann(im, points, scans)

# trim by relative intensity
apl = rel_threshold(pl, r)

# trim by threshold
peak_list = num_ions_threshold(apl, n, t)

print "Number of Peaks found:", len(peak_list)

# ignore TMS ions and set mass range
for peak in peak_list:
    peak.crop_mass(50,540)
    peak.null_mass(73)
    peak.null_mass(147)
    # find area
    area = peak_sum_area(im, peak)
    peak.set_area(area)

# create an experiment
expr = Experiment("a0806_077", peak_list)

# set time range for all experiments
expr.sele_rt_range(["6.5m", "21m"])

store_expr("output/a0806_077.expr", expr)
Exemplo n.º 20
0
	def quantitative_processing(self, jcamp_file, log_stdout=True):
		"""
		Import JCAMP-DX Files

		:param jcamp_file:
		:type jcamp_file:
		:param log_stdout:
		:type log_stdout:
		
		:return:
		:rtype:
		"""
		
		# Determine the name of the sample from the filename
		sample_name = os.path.splitext(os.path.basename(jcamp_file))[0]
		
		# Log Stdout to File
		if log_stdout:
			sys.stdout = open(os.path.join(self.config.log_dir, sample_name + ".log"), "w")
		
		# Load data using JCAMP_reader
		data = JCAMP_reader(jcamp_file)
		
		# list of all retention times, in seconds
		times = data.get_time_list()
		# get Total Ion Chromatogram
		tic = data.get_tic()
		# RT Range, time step, no. scans, min, max, mean and median m/z
		data.info()
		
		# Build "intensity matrix" by binning data with integer bins and a
		# 	window of -0.3 to +0.7, the same as NIST uses
		im = build_intensity_matrix_i(data)
		
		# Show the m/z of the maximum and minimum bins
		print(" Minimum m/z bin: {}".format(im.get_min_mass()))
		print(" Maximum m/z bin: {}".format(im.get_max_mass()))
		
		# Crop masses
		min_mass, max_mass, *_ = self.config.mass_range
		
		if min_mass < im.get_min_mass():
			min_mass = im.get_min_mass()
		if max_mass > im.get_max_mass():
			max_mass = im.get_max_mass()
		im.crop_mass(min_mass, max_mass)
		
		# Perform Data filtering
		n_scan, n_mz = im.get_size()
		
		# Iterate over each IC in the intensity matrix
		for ii in range(n_mz):
			# print("\rWorking on IC#", ii+1, '  ',end='')
			ic = im.get_ic_at_index(ii)
			
			# Perform Savitzky-Golay smoothing.
			# Note that Turbomass does not use smoothing for qualitative method.
			ic_smooth = savitzky_golay(ic)
			
			# Perform Tophat baseline correction
			# Top-hat baseline Correction seems to bring down noise,
			#  		retaining shapes, but keeps points on actual peaks
			ic_bc = tophat(ic_smooth, struct=self.config.tophat_struct)
			
			# Set the IC in the intensity matrix to the filtered one
			im.set_ic_at_index(ii, ic_bc)
		
		# Peak Detection based on Biller and Biemann (1974), with a window
		# 	of <points>, and combining <scans> if they apex next to each other
		peak_list = BillerBiemann(im, points=self.config.bb_points, scans=self.config.bb_scans)
		
		print(" Number of peaks identified before filtering: {}".format(len(peak_list)))
		
		# Filtering peak lists with automatic noise filtering
		noise_level = window_analyzer(tic)
		# should we also do rel_threshold() here?
		# https://pymassspec.readthedocs.io/en/master/pyms/BillerBiemann.html#pyms.BillerBiemann.rel_threshold
		peak_list = num_ions_threshold(peak_list, self.config.noise_thresh, noise_level)
		
		filtered_peak_list = []
		
		for peak in peak_list:
			# Get mass and intensity lists for the mass spectrum at the apex of the peak
			apex_mass_list = peak.mass_spectrum.mass_list
			apex_mass_spec = peak.mass_spectrum.mass_spec
			
			# Determine the intensity of the base peak in the mass spectrum
			base_peak_intensity = max(apex_mass_spec)
			
			# Determine the index of the base peak in the mass spectrum
			base_peak_index = [
				index for index, intensity in enumerate(apex_mass_spec)
				if intensity == base_peak_intensity][0]
			
			# Finally, determine the mass of the base peak
			base_peak_mass = apex_mass_list[base_peak_index]
			
			# skip the peak if the base peak is at e.g. m/z 73, i.e. septum bleed
			if base_peak_mass in self.config.base_peak_filter:
				continue
			
			area = peak_sum_area(im, peak)
			peak.set_area(area)
			filtered_peak_list.append(peak)
			
		print(" Number of peaks identified: {}".format(len(filtered_peak_list)))
		
		# Save the TIC and Peak List
		tic.write(os.path.join(self.config.expr_dir, "{}_tic.dat".format(sample_name)), formatting=False)
		store_peaks(filtered_peak_list, os.path.join(self.config.expr_dir, "{}_peaks.dat".format(sample_name)))
		
		# Create an experiment
		expr = Experiment(sample_name, filtered_peak_list)
		expr.sele_rt_range(["{}m".format(self.config.target_range[0]), "{}m".format(self.config.target_range[1])])
		store_expr(os.path.join(self.config.expr_dir, "{}.expr".format(sample_name)), expr)
Exemplo n.º 21
0
# greater than or equal to threshold, t
t = 4000
# trim by relative intensity
pl = rel_threshold(peak_list, r)

# trim by threshold
new_peak_list = num_ions_threshold(pl, n, t)

print("Number of filtered peaks: ", len(new_peak_list))
print("Peak areas")
print("UID, RT, height, area")
for peak in new_peak_list:
    rt = peak.rt
    
    # determine and set area
    area = peak_sum_area(im, peak)
    peak.area = area

    # print some details
    UID = peak.UID
    # height as sum of the intensities of the apexing ions
    height = sum(peak.get_mass_spectrum().mass_spec.tolist())
    print(UID + f", {rt:.2f}, {height:.2f}, {peak.area:.2f}")

# TIC from raw data
tic = data.get_tic()
# baseline correction for TIC
tic_bc = tophat(tic, struct="1.5m")

# Get Ion Chromatograms for all m/z channels
n_mz = len(im.get_mass_list())
Exemplo n.º 22
0
def Peak_detector(pp_im, noise, name, points, scans, percent, ni, name_tag, sdir):
    """
    Intake cleansed intensity matrices and CMD args
    Produces list of peaks and corresponding mass spectrum of each sample

    @param pp_im: Cleansed intensity matrices from the Preprocess_IntensityMatrices method
    @param noise: Noise level approximation produced by the matrix_from_cdf method
    @param name: Sample name use from creating mass spectrum .csv files
    @param points: Size of window use for peak detection in BillerBiemann method
    @param scans: Number of adjacent windows to compare for peak detection in BillerBiemann method
    @param percent: Percentile threshold a peak must exceed to be considered an informative peak
    @param ni: Number of ions required per peak to be considered an informative peak
    @param name_tag: String consisting of CMD args for identification, ie. 'p140s25%3n3'
    @param sdir: Directory to save the mass spectrum .csv files
    @return: List of peaks per sample
    @return: csv files containing mass spectrum corresponding to each peak
    """

    peakz = []
    savePath = sdir
    ms_data_files = []

    print("len pp_im", len(list(pp_im)))
    print("len noise", len(noise))
    print("len name", len(name), name)

    for im, n, na in itertools.izip(list(pp_im), noise, name):

        poss_peaks = BillerBiemann(im, points=points, scans=scans)
        pi = rel_threshold(poss_peaks, percent=percent)
        nin = num_ions_threshold(pi, n=ni, cutoff=n)

        completeName = os.path.join(savePath, na + name_tag + "ms_data.csv")
        with open(completeName, 'w') as f:
            w = csv.writer(f)
            head = ['Area', 'RTs'] + [float(i) for i in range(35,221)]


            w.writerow(head)
            for peak in nin:

                area = peak_sum_area(im, peak)
                peak.set_area(area)
                ms = peak.get_mass_spectrum()
                p_rt = peak.get_rt()
                its = []
                ms_items = list(ms.mass_spec)
                for spec in ms_items:
                    f_spec = float(spec)
                    its.append(f_spec)

                ms_d = ([area] + [p_rt] + its)

                w.writerow(ms_d)

            f.close()

        peakz.append(nin)
        ms_data_files.append(completeName)
    print('ms_data_files:', ms_data_files)

    return [peakz, ms_data_files]