Exemplo n.º 1
0
    def aligned_peaks(self, minutes=False):
        """
        @summary: Returns a list of Peak objects where each peak
            has the combined spectra and average retention time
            of all peaks that aligned.

        @param minutes: An optional indicator of whether retention
            times are in minutes. If False, retention time are in
            seconds
        @type minutes: BooleanType

        @return: A list of composite peaks based on the alignment.
        @rtype: ListType

        @author: Andrew Isaac
        """

        # for all peaks found
        peak_list = []
        for peak_idx in range(len(self.peakpos[0])):
            # get aligned peaks, ignore missing
            new_peak_list = []
            for align_idx in range(len(self.peakpos)):
                peak = self.peakpos[align_idx][peak_idx]
                if peak is not None:
                    new_peak_list.append(peak)
            #create composite
            new_peak = composite_peak(new_peak_list, minutes)
            peak_list.append(new_peak)

        return peak_list
Exemplo n.º 2
0
    def aligned_peaks(self, minutes: bool = False) -> List:
        """
		Returns a list of Peak objects where each peak has the combined spectra
			and average retention time of all peaks that aligned.

		:param minutes: An optional indicator of whether retention times are in
		minutes. If False, retention time are in seconds
		:type minutes: bool, optional

		:return: A list of composite peaks based on the alignment.
		:rtype: list

		:author: Andrew Isaac
		"""

        # TODO: minutes currently does nothing

        # for all peaks found
        peak_list = []

        for peak_idx in range(len(self.peakpos[0])):
            # get aligned peaks, ignore missing

            new_peak_list = []
            for align_idx in range(len(self.peakpos)):
                peak = self.peakpos[align_idx][peak_idx]

                if peak is not None:
                    new_peak_list.append(peak)

            # create composite
            new_peak = composite_peak(new_peak_list)
            peak_list.append(new_peak)

        return peak_list
Exemplo n.º 3
0
    def aligned_peaks(self, minutes=False):

        """
        @summary: Returns a list of Peak objects where each peak
            has the combined spectra and average retention time
            of all peaks that aligned.

        @param minutes: An optional indicator of whether retention
            times are in minutes. If False, retention time are in
            seconds
        @type minutes: BooleanType

        @return: A list of composite peaks based on the alignment.
        @rtype: ListType

        @author: Andrew Isaac
        """

        # for all peaks found
        peak_list = []
        for peak_idx in range(len(self.peakpos[0])):
            # get aligned peaks, ignore missing
            new_peak_list = []
            for align_idx in range(len(self.peakpos)):
                peak = self.peakpos[align_idx][peak_idx]
                if peak is not None:
                    new_peak_list.append(peak)
            #create composite
            new_peak = composite_peak(new_peak_list, minutes)
            peak_list.append(new_peak)

        return peak_list
Exemplo n.º 4
0
def test_write_ion_areas_csv(A1, tmp_pathplus):
    A1.write_ion_areas_csv(tmp_pathplus / "alignment_ion_areas.csv")
    A1.write_ion_areas_csv(tmp_pathplus / "alignment_ion_areas_seconds.csv",
                           minutes=False)

    # Read alignment_ion_areas.csv and check values
    assert (tmp_pathplus / "alignment_ion_areas.csv").exists()

    ion_csv = list(
        csv.reader((tmp_pathplus / "alignment_ion_areas.csv").open(),
                   delimiter='|'))
    seconds_ion_csv = list(
        csv.reader((tmp_pathplus / "alignment_ion_areas_seconds.csv").open(),
                   delimiter='|'))

    assert ion_csv[0][0:2] == seconds_ion_csv[0][0:2] == ["UID", "RTavg"]
    assert ion_csv[0][2:] == seconds_ion_csv[0][2:] == A1.expr_code

    for peak_idx in range(len(
            A1.peakpos[0])):  # loop through peak lists (rows)

        new_peak_list = []

        for align_idx in range(len(A1.peakpos)):
            peak = A1.peakpos[align_idx][peak_idx]

            if peak is not None:
                ia = peak.ion_areas
                ia.update((mass, math.floor(intensity))
                          for mass, intensity in ia.items())
                sorted_ia = sorted(ia.items(),
                                   key=operator.itemgetter(1),
                                   reverse=True)

                assert ion_csv[peak_idx + 1][align_idx + 2] == str(sorted_ia)
                assert seconds_ion_csv[peak_idx + 1][align_idx +
                                                     2] == str(sorted_ia)

                new_peak_list.append(peak)

        compo_peak = composite_peak(new_peak_list)
        assert compo_peak is not None

        assert ion_csv[peak_idx +
                       1][0] == seconds_ion_csv[peak_idx +
                                                1][0] == compo_peak.UID

        assert ion_csv[peak_idx + 1][1] == f"{float(compo_peak.rt / 60):.3f}"
        assert seconds_ion_csv[peak_idx +
                               1][1] == f"{float(compo_peak.rt):.3f}"
Exemplo n.º 5
0
    def write_excel(self, excel_file_name, minutes=True):
        """
        @summary: Writes the alignment to an excel file, with colouring showing possible mis-alignments

        @param excel_file_name: The name for the retention time alignment file
        @type excel_file_name: StringType
        @param minutes: An optional indicator whether to save retention times
            in minutes. If False, retention time will be saved in seconds
        @type minutes: BooleanType

        @author: David Kainer
        """
        wb = Workbook()
        ws = wb.active
        ws.title = "Aligned RT"

        # create header row
        ws['A1'] = "UID"
        ws['B1'] = "RTavg"
        for i,item in enumerate(self.expr_code):
            currcell = ws.cell( row = 1, column = i+3, value= "%s" % item )
            comment = Comment('sample '+str(i), 'dave')
            currcell.comment = comment

        # for each alignment position write alignment's peak and area
        for peak_idx in range(len(self.peakpos[0])):    # loop through peak lists (rows)

            new_peak_list = []

            for align_idx in range(len(self.peakpos)):   # loops through samples (columns)
                peak = self.peakpos[align_idx][peak_idx]

                if peak is not None:

                    if minutes:
                        rt = peak.get_rt()/60.0
                    else:
                        rt = peak.get_rt()

                    area = peak.get_area()
                    new_peak_list.append(peak)

                    # write the RT into the cell in the excel file
                    currcell = ws.cell( row = 2+peak_idx, column = 3+align_idx, value=round(rt, 3) )

                    # get the mini-mass spec for this peak, and divide the ion intensities by 1000 to shorten them
                    ia = peak.get_ion_areas()
                    ia.update( (mass, int(intensity/1000)) for mass, intensity in ia.items() )
                    sorted_ia = sorted(ia.iteritems(), key=operator.itemgetter(1), reverse=True)

                    # write the peak area and mass spec into the comment for the cell
                    comment = Comment("Area: %.0f | MassSpec: %s" % (area,sorted_ia), 'dave')
                    currcell.number_format
                    currcell.comment = comment

                else:
                    rt = 'NA'
                    area = 'NA'
                    currcell = ws.cell( row = 2+peak_idx, column = 3+align_idx, value='NA' )
                    comment = Comment("Area: NA", 'dave')
                    currcell.number_format
                    currcell.comment = comment

            compo_peak      = composite_peak(new_peak_list, minutes)
            peak_UID        = compo_peak.get_UID()
            peak_UID_string = ( '"%s"' % peak_UID)

            currcell = ws.cell( row = 2+peak_idx, column = 1, value = peak_UID_string )
            currcell = ws.cell( row = 2+peak_idx, column = 2, value = "%.3f" % float(compo_peak.get_rt()/60) )

        # colour the cells in each row based on their RT percentile for that row
        i = 0
        for row in ws.rows:
            i += 1
            cell_range = ("{0}"+str(i)+":{1}"+str(i)).format(utils.get_column_letter(3), utils.get_column_letter(len(row)))
            ws.conditional_formatting.add(cell_range, ColorScaleRule(start_type='percentile', start_value=1, start_color='E5FFCC',
                                                               mid_type='percentile', mid_value=50, mid_color='FFFFFF',
                                                               end_type='percentile', end_value=99, end_color='FFE5CC'))
        wb.save(excel_file_name)
Exemplo n.º 6
0
    def write_csv(self,
                  rt_file_name: Union[str, pathlib.Path],
                  area_file_name: Union[str, pathlib.Path],
                  minutes: bool = True):
        """
		Writes the alignment to CSV files

		This function writes two files: one containing the alignment of peak
		retention times and the other containing the alignment of peak areas.

		:param rt_file_name: The name for the retention time alignment file
		:type rt_file_name: str or pathlib.Path
		:param area_file_name: The name for the areas alignment file
		:type area_file_name: str or pathlib.Path
		:param minutes: An optional indicator whether to save retention times
			in minutes. If False, retention time will be saved in seconds
		:type minutes: bool, optional

		:author: Woon Wai Keen
		:author: Andrew Isaac
		:author: Vladimir Likic
		:author: David Kainer
		:author: Dominic Davis-Foster (pathlib support)
		"""

        if not isinstance(rt_file_name, (str, pathlib.Path)):
            raise TypeError(
                "'rt_file_name' must be a string or a pathlib.Path object")

        if not isinstance(area_file_name, (str, pathlib.Path)):
            raise TypeError(
                "'area_file_name' must be a string or a pathlib.Path object")

        rt_file_name = prepare_filepath(rt_file_name)
        area_file_name = prepare_filepath(area_file_name)

        fp1 = rt_file_name.open("w")
        fp2 = area_file_name.open("w")

        # create header
        header = ['UID', 'RTavg']
        for item in self.expr_code:
            header.append(f'"{item}"')

        # write headers
        fp1.write(",".join(header) + "\n")
        fp2.write(",".join(header) + "\n")

        # for each alignment position write alignment's peak and area
        for peak_idx in range(len(
                self.peakpos[0])):  # loop through peak lists (rows)

            rts = []
            areas = []
            new_peak_list = []

            for align_idx in range(len(self.peakpos)):
                peak = self.peakpos[align_idx][peak_idx]

                if peak is not None:

                    if minutes:
                        rt = peak.rt / 60.0
                    else:
                        rt = peak.rt

                    rts.append(rt)
                    areas.append(peak.area)
                    new_peak_list.append(peak)

                else:
                    rts.append(None)
                    areas.append(None)

            compo_peak = composite_peak(new_peak_list)

            # write to retention times file
            fp1.write(compo_peak.UID)

            if minutes:
                fp1.write(f",{float(compo_peak.rt / 60):.3f}")
            else:
                fp1.write(f",{compo_peak.rt:.3f}")

            for rt in rts:
                if rt is None or numpy.isnan(rt):
                    fp1.write(",NA")
                else:
                    fp1.write(f",{rt:.3f}")
            fp1.write("\n")

            # write to peak areas file
            fp2.write(compo_peak.UID)

            if minutes:
                fp2.write(f",{float(compo_peak.rt / 60):.3f}")
            else:
                fp2.write(f",{compo_peak.rt:.3f}")

            for area in areas:
                if area is None:
                    fp2.write(",NA")
                else:
                    fp2.write(f",{area:.0f}")
            fp2.write("\n")

        fp1.close()
        fp2.close()
Exemplo n.º 7
0
    def write_ion_areas_csv(self, ms_file_name, minutes=True):
        try:
            fp1 = open(ms_file_name, "w")  #dk
        except IOError:
            error("Cannot open output file for writing")

        # create header
        header = '"UID"|"RTavg"'
        for item in self.expr_code:
            expr_code = ('"%s"' % item)
            header = header + "|" + expr_code
        header = header + "\n"

        fp1.write(header)  #dk

        for peak_idx in range(len(self.peakpos[0])):

            rts = []
            ias = []
            new_peak_list = []
            avgrt = 0
            countrt = 0

            for align_idx in range(len(self.peakpos)):

                peak = self.peakpos[align_idx][peak_idx]

                if peak is not None:

                    if minutes:
                        rt = peak.get_rt() / 60.0
                    else:
                        rt = peak.get_rt()

                    rts.append(rt)

                    ia = peak.get_ion_areas()
                    ia.update((mass, math.floor(intensity))
                              for mass, intensity in ia.items())
                    sorted_ia = sorted(ia.iteritems(),
                                       key=operator.itemgetter(1),
                                       reverse=True)
                    ias.append(sorted_ia)
                    new_peak_list.append(peak)

                    avgrt = avgrt + rt
                    countrt = countrt + 1
                else:
                    rts.append(None)
                    ias.append(None)

            if countrt > 0:
                avgrt = avgrt / countrt

            compo_peak = composite_peak(new_peak_list, minutes)
            peak_UID = compo_peak.get_UID()
            peak_UID_string = ('"%s"' % peak_UID)

            # write to ms file
            fp1.write(peak_UID_string)
            fp1.write("|%.3f" % avgrt)
            for ia in ias:
                if ia == None:
                    fp1.write("|NA")
                else:
                    fp1.write("|%s" % ia)
            fp1.write("\n")

        fp1.close()
Exemplo n.º 8
0
    def write_ion_areas_csv(self, ms_file_name, minutes=True):
        try:
            fp1 = open(ms_file_name, "w")   #dk
        except IOError:
            error("Cannot open output file for writing")
        
        # create header
        header = '"UID"|"RTavg"'
        for item in self.expr_code:
            expr_code = ( '"%s"' % item )
            header = header + "|" + expr_code
        header = header + "\n"
        
        fp1.write(header)                   #dk
        
        for peak_idx in range(len(self.peakpos[0])):

            rts = []
            ias = []
            new_peak_list = []
            avgrt = 0
            countrt = 0

            for align_idx in range(len(self.peakpos)):

                peak = self.peakpos[align_idx][peak_idx]

                if peak is not None:

                    if minutes:
                        rt = peak.get_rt()/60.0
                    else:
                        rt = peak.get_rt()

                    rts.append(rt)
                    
                    ia = peak.get_ion_areas()
                    ia.update( (mass, math.floor(intensity)) for mass, intensity in ia.items() )
                    sorted_ia = sorted(ia.iteritems(), key=operator.itemgetter(1), reverse=True)
                    ias.append(sorted_ia)
                    new_peak_list.append(peak)
                    
                    avgrt = avgrt + rt
                    countrt = countrt + 1
                else:
                    rts.append(None)
                    ias.append(None)

            if countrt > 0:
                avgrt = avgrt/countrt

            compo_peak = composite_peak(new_peak_list, minutes)
            peak_UID = compo_peak.get_UID()
            peak_UID_string = ( '"%s"' % peak_UID)

            # write to ms file
            fp1.write(peak_UID_string)
            fp1.write("|%.3f" % avgrt)
            for ia in ias:
                if ia == None:
                    fp1.write("|NA")
                else:
                    fp1.write("|%s" % ia)
            fp1.write("\n")

        fp1.close()
Exemplo n.º 9
0
    def write_csv(self, rt_file_name, area_file_name, minutes=True):
        """
        @summary: Writes the alignment to CSV files

        This function writes two files: one containing the alignment of peak
        retention times and the other containing the alignment of peak areas.

        @param rt_file_name: The name for the retention time alignment file
        @type rt_file_name: StringType
        @param area_file_name: The name for the areas alignment file
        @type area_file_name: StringType
        @param minutes: An optional indicator whether to save retention times
            in minutes. If False, retention time will be saved in seconds
        @type minutes: BooleanType

        @author: Woon Wai Keen
        @author: Andrew Isaac
        @author: Vladimir Likic
        """

        try:
            fp1 = open(rt_file_name, "w")
            fp2 = open(area_file_name, "w")
        except IOError:
            error("Cannot open output file for writing")

        # create header
        header = '"UID","RTavg"'
        for item in self.expr_code:
            expr_code = ('"%s"' % item)
            header = header + "," + expr_code
        header = header + "\n"

        # write headers
        fp1.write(header)
        fp2.write(header)

        # for each alignment position write alignment's peak and area
        for peak_idx in range(len(self.peakpos[0])):

            rts = []
            areas = []
            new_peak_list = []
            avgrt = 0
            countrt = 0

            for align_idx in range(len(self.peakpos)):

                peak = self.peakpos[align_idx][peak_idx]

                if peak is not None:

                    if minutes:
                        rt = peak.get_rt() / 60.0
                    else:
                        rt = peak.get_rt()

                    rts.append(rt)
                    areas.append(peak.get_area())
                    new_peak_list.append(peak)

                    avgrt = avgrt + rt
                    countrt = countrt + 1
                else:
                    rts.append(None)
                    areas.append(None)

            if countrt > 0:
                avgrt = avgrt / countrt

            compo_peak = composite_peak(new_peak_list, minutes)
            peak_UID = compo_peak.get_UID()
            peak_UID_string = ('"%s"' % peak_UID)

            # write to retention times file
            fp1.write(peak_UID_string)
            fp1.write(",%.3f" % avgrt)

            for rt in rts:
                if rt == None:
                    fp1.write(",NA")
                else:
                    fp1.write(",%.3f" % rt)
            fp1.write("\n")

            # write to peak areas file
            fp2.write(peak_UID_string)
            fp2.write(",%.3f" % avgrt)
            for area in areas:
                if area == None:
                    fp2.write(",NA")
                else:
                    fp2.write(",%.4f" % area)
            fp2.write("\n")

        fp1.close()
        fp2.close()
Exemplo n.º 10
0
    def write_common_ion_csv(self, area_file_name, top_ion_list, minutes=True):
        """
        @summary: Writes the alignment to CSV files

        This function writes two files: one containing the alignment of peak
        retention times and the other containing the alignment of peak areas.

        @param area_file_name: The name for the areas alignment file
        @type area_file_name: StringType
        @param top_ion_list: A list of the highest intensity common ion
                             along the aligned peaks
        @type top_ion_list: ListType
        @param minutes: An optional indicator whether to save retention times
            in minutes. If False, retention time will be saved in seconds
        @type minutes: BooleanType

        @author: Woon Wai Keen
        @author: Andrew Isaac
        @author: Sean O'Callaghan
        @author: Vladimir Likic
        """

        try:
            fp = open(area_file_name, "w")
        except IOError:
            error("Cannot open output file for writing")

        if top_ion_list == None:
            error("List of common ions must be supplied")

        # create header
        header = '"UID","RTavg", "Quant Ion"'
        for item in self.expr_code:
            expr_code = ('"%s"' % item)
            header = header + "," + expr_code
        header = header + "\n"

        # write headers

        fp.write(header)

        rtsums = []
        rtcounts = []

        # The following two arrays will become list of lists
        # such that:
        # areas = [  [align1_peak1, align2_peak1, .....,alignn_peak1]
        #            [align1_peak2, ................................]
        #              .............................................
        #            [align1_peakm,....................,alignn_peakm]  ]
        areas = []
        new_peak_lists = []

        for peak_list in self.peakpos:
            index = 0
            for peak in peak_list:
                # one the first iteration, populate the lists
                if len(areas) < len(peak_list):
                    areas.append([])
                    new_peak_lists.append([])
                    rtsums.append(0)
                    rtcounts.append(0)

                if peak is not None:
                    rt = peak.get_rt()

                    # get the area of the common ion for the peak
                    # an area of 'na' shows that while the peak was
                    # aligned, the common ion was not present
                    area = peak.get_ion_area(top_ion_list[index])

                    areas[index].append(area)
                    new_peak_lists[index].append(peak)

                    # The following code to the else statement is
                    # just for calculating the average rt
                    rtsums[index] += rt
                    rtcounts[index] += 1

                else:
                    areas[index].append(None)

                index += 1

        out_strings = []
        index = 0
        # now write the strings for the file
        for area_list in areas:

            # write initial info:
            # peak unique id, peak average rt
            compo_peak = composite_peak(new_peak_lists[index], minutes)
            peak_UID = compo_peak.get_UID()
            peak_UID_string = ('"%s"' % peak_UID)

            rt_avg = rtsums[index] / rtcounts[index]

            out_strings.append(peak_UID_string + (",%.3f" % (rt_avg/60))+\
                                   (",%d" % top_ion_list[index]))

            for area in area_list:
                if area is not None:
                    out_strings[index] += (",%.4f" % area)
                else:
                    out_strings[index] += (",NA")

            index += 1

        # now write the file


#        print "length of areas[0]", len(areas[0])
#        print "lenght of areas", len(areas)
#        print "length of out_strings", len(out_strings)
        for row in out_strings:
            fp.write(row + "\n")

        fp.close()
Exemplo n.º 11
0
    def write_mass_hunter_csv(self, out_file, top_ion_list):#, peak_list_name):
        """
        @summary: Returns a csv file with ion ratios
                  and UID

        @param out_file: name of the output file
        @type out_file: strType

        @param top_ion_list: a list of the common ions for each
                             peak in the averaged peak list for the
                             alignment
        @type top_ion_list: listType

        @return: a csv file with UID, common and qualifying ions
                 and their ratios for mass hunter interpretation
        @rtype: fileType
        """
        try:
            fp = open(out_file, "w")
        except IOError:
            error("Cannot open output file for writing")

        if top_ion_list == None:
            error("List of common ions must be supplied")

        # create header
        header = '"UID","Common Ion", "Qual Ion 1", "ratio QI1/CI", "Qual Ion 2", "ratio QI2/CI", "l window delta", "r window delta"'
        header = header + "\n"

        # write headers

        fp.write(header)

        rtsums = []
        rtcounts = []

        # The following two arrays will become list of lists
        # such that:
        # areas = [  [align1_peak1, align2_peak1, .....,alignn_peak1]
        #            [align1_peak2, ................................]
        #              .............................................
        #            [align1_peakm,....................,alignn_peakm]  ]
        areas = []
        new_peak_lists = []
        rtmax = []
        rtmin = []

        for peak_list in self.peakpos:
            index = 0
            for peak in peak_list:
                # on the first iteration, populate the lists
                if len(areas) < len(peak_list):
                    areas.append([])
                    new_peak_lists.append([])
                    rtsums.append(0)
                    rtcounts.append(0)
                    rtmax.append(0.0)
                    rtmin.append(0.0)
                    

                if peak is not None:
                    rt = peak.get_rt()
                    

                    # get the area of the common ion for the peak
                    # an area of 'na' shows that while the peak was
                    # aligned, the common ion was not present
                    area = peak.get_ion_area(top_ion_list[index])
                     
                    areas[index].append(area)
                    new_peak_lists[index].append(peak)

                    # The following code to the else statement is
                    # just for calculating the average rt
                    rtsums[index] += rt
                    rtcounts[index] += 1

                    # quick workaround for weird problem when
                    # attempting to set rtmin to max time above
                    if rtmin[index] == 0.0:
                        rtmin[index] = 5400.0
                        
                    if rt > rtmax[index]:
                        rtmax[index] = rt
                        
                    if rt < rtmin[index]:
                        rtmin[index] = rt
                    
                else:
                    areas[index].append(None)

                index += 1

        out_strings = []
        compo_peaks = []
        index = 0
        # now write the strings for the file
        for area_list in areas:
 
            # write initial info:
            # peak unique id, peak average rt
            compo_peak = composite_peak(new_peak_lists[index], minutes=False)
            compo_peaks.append(compo_peak)
            peak_UID = compo_peak.get_UID()
            peak_UID_string = ( '"%s"' % peak_UID)

            #calculate the time from the leftmost peak to the average
            l_window_delta = compo_peak.get_rt() - rtmin[index]
            #print "l_window", l_window_delta, "rt", compo_peak.get_rt(), "rt_min", rtmin[index]
            r_window_delta = rtmax[index] - compo_peak.get_rt()

            common_ion = top_ion_list[index]
            qual_ion_1 = int(peak_UID_string.split('-')[0].strip('"'))
            qual_ion_2 = int(peak_UID_string.split('-')[1])

            if qual_ion_1 == common_ion:
                qual_ion_1 = compo_peak.get_third_highest_mz()
            elif qual_ion_2 == common_ion:
                qual_ion_2 = compo_peak.get_third_highest_mz()
            else:
                pass
            
            ci_intensity = compo_peak.get_int_of_ion(common_ion)
            if ci_intensity == None:
                print "No Ci for peak", index
            q1_intensity = compo_peak.get_int_of_ion(qual_ion_1)
            q2_intensity = compo_peak.get_int_of_ion(qual_ion_2)

            try:
                q1_ci_ratio = float(q1_intensity)/float(ci_intensity)
            except(TypeError): # if no area available for that ion
                q1_ci_ratio = 0.0
            except(ZeroDivisionError): #shouldn't happen but does!!
                q1_ci_ratio = 0.01
            try:
                q2_ci_ratio = float(q2_intensity)/float(ci_intensity)
            except(TypeError):
                q2_ci_ratio = 0.0
            except(ZeroDivisionError): #shouldn't happen, but does!!
                q2_ci_ratio = 0.01
                                


            out_strings.append(peak_UID + ',' + str(common_ion) + ',' + \
                                   str(qual_ion_1) + \
                                   (",%.1f" % (q1_ci_ratio*100))\
                                   + ',' + str(qual_ion_2) + \
                                   (",%.1f" % (q2_ci_ratio*100)) +
                               (",%.2f" % ((l_window_delta+1.5)/60)) +
                               (",%.2f" % ((r_window_delta+1.5)/60)))
            index += 1

        # now write the file
#        print "length of areas[0]", len(areas[0])
#        print "lenght of areas", len(areas)
#        print "length of out_strings", len(out_strings)
        for row in out_strings:
            fp.write(row +"\n")

        #dump_object(compo_peaks, peak_list_name)
                
        fp.close()
Exemplo n.º 12
0
    def write_transposed_output(self, excel_file_name, minutes=True):
        wb = Workbook()
        ws1 = wb.create_sheet(title='Aligned RT')
        ws2 = wb.create_sheet(title='Aligned Area')

        ws1['A1'] = "Peak"
        ws1['A2'] = "RTavg"

        ws2['A1'] = "Peak"
        ws2['A2'] = "RTavg"

        style_outlier = PatternFill(fill_type="solid", fgColor="FFAE19", bgColor="FFAE19")


        # write column with sample IDs
        for i,item in enumerate(self.expr_code):
            currcell = ws1.cell( column = 1, row = i+3, value= "%s" % item )
            currcell = ws2.cell( column = 1, row = i+3, value= "%s" % item )

        # for each alignment position write alignment's peak and area
        for peak_idx in range(len(self.peakpos[0])):    # loop through peak lists

            new_peak_list = []  # this will contain a list of tuples of form (peak, col, row), but only non-NA peaks
            cell_col, cell_row = 0,0

            for align_idx in range(len(self.peakpos)):   # loops through samples
                peak = self.peakpos[align_idx][peak_idx]
                cell_col = 2+peak_idx
                cell_row = 3+align_idx

                if peak is not None:

                    if minutes:
                        rt = peak.get_rt()/60.0
                    else:
                        rt = peak.get_rt()

                    area = peak.get_area()

                    #these are the col,row coords of the peak in the output matrix
                    new_peak_list.append((peak,cell_col,cell_row))

                    # write the RT into the cell in the excel file
                    currcell1 = ws1.cell( column = cell_col, row = cell_row, value=round(rt, 3) )
                    currcell2 = ws2.cell( column = cell_col, row = cell_row, value=round(area, 3) )

                    # get the mini-mass spec for this peak, and divide the ion intensities by 1000 to shorten them
                    ia = peak.get_ion_areas()
                    ia.update( (mass, int(intensity/1000)) for mass, intensity in ia.items() )
                    sorted_ia = sorted(ia.iteritems(), key=operator.itemgetter(1), reverse=True)

                    # write the peak area and mass spec into the comment for the cell
                    comment = Comment("Area: %.0f | MassSpec: %s" % (area,sorted_ia), 'dave')
                    currcell1.comment = comment


                else:
                    rt = 'NA'
                    area = 'NA'
                    currcell1 = ws1.cell( column = cell_col, row = cell_row, value='NA' )
                    currcell2 = ws2.cell( column = cell_col, row = cell_row, value='NA' )
                    comment = Comment("Area: NA", 'dave')
                    currcell1.comment = comment


            compo_peak      = composite_peak( list(p[0] for p in new_peak_list), minutes)   # this method will create the compo peak, aqnd also mark outlier peaks with a bool isoutlier
            peak_UID        = compo_peak.get_UID()
            peak_UID_string = ( '"%s"' % peak_UID)

            currcell = ws1.cell( column = 2+peak_idx, row = 1, value = peak_UID_string )
            currcell = ws1.cell( column = 2+peak_idx, row = 2, value = "%.3f" % float(compo_peak.get_rt()/60) )
            currcell = ws2.cell( column = 2+peak_idx, row = 1, value = peak_UID_string )
            currcell = ws2.cell( column = 2+peak_idx, row = 2, value = "%.3f" % float(compo_peak.get_rt()/60) )

            # highlight outlier cells in the current peak list
            for p in new_peak_list:
                if p[0].isoutlier:
                    #ws[ get_column_letter(p[1]) + str(p[2]) ].style = style_outlier
                    ws1.cell(column = p[1], row = p[2]).fill = style_outlier
                    ws2.cell(column = p[1], row = p[2]).fill = style_outlier


        wb.save(excel_file_name)
Exemplo n.º 13
0
    def write_common_ion_csv(self,
                             area_file_name: Union[str, pathlib.Path],
                             top_ion_list: List,
                             minutes: bool = True):
        """
		Writes the alignment to CSV files

		This function writes two files: one containing the alignment of peak
		retention times and the other containing the alignment of peak areas.

		:param area_file_name: The name for the areas alignment file
		:type area_file_name: str or os.PathLike
		:param top_ion_list: A list of the highest intensity common ion along the aligned peaks
		:type top_ion_list: ~collections.abc.Sequence
		:param minutes: An optional indicator whether to save retention times
			in minutes. If False, retention time will be saved in seconds
		:type minutes: bool, optional

		:author: Woon Wai Keen
		:author: Andrew Isaac
		:author: Sean O'Callaghan
		:author: Vladimir Likic
		:author: Dominic Davis-Foster (pathlib support)
		"""

        # TODO: minutes currently does nothing

        if not is_path(area_file_name):
            raise TypeError(
                "'area_file_name' must be a string or a PathLike object")

        if not is_sequence_of(top_ion_list, Number):
            raise TypeError("'top_ion_list' must be a Sequence of Numbers")

        area_file_name = prepare_filepath(area_file_name)

        with area_file_name.open("w") as fp:

            # create header
            header = ['"UID"', '"RTavg"', '"Quant Ion"']
            for item in self.expr_code:
                header.append(f'"{item}"')

            # write headers
            fp.write(",".join(header) + "\n")

            rtsums = []
            rtcounts = []

            # The following two arrays will become list of lists
            # such that:
            # areas = [  [align1_peak1, align2_peak1, .....,alignn_peak1]
            #            [align1_peak2, ................................]
            #              .............................................
            #            [align1_peakm,....................,alignn_peakm]  ]
            areas: List[List] = []
            new_peak_lists: List[List[Peak]] = []

            for peak_list in self.peakpos:
                index = 0
                for peak in peak_list:
                    # one the first iteration, populate the lists
                    if len(areas) < len(peak_list):
                        areas.append([])
                        new_peak_lists.append([])
                        rtsums.append(0)
                        rtcounts.append(0)

                    if peak is not None:
                        rt = peak.rt

                        # get the area of the common ion for the peak
                        # an area of 'na' shows that while the peak was
                        # aligned, the common ion was not present
                        area = peak.get_ion_area(top_ion_list[index])

                        areas[index].append(area)
                        new_peak_lists[index].append(peak)

                        # The following code to the else statement is
                        # just for calculating the average rt
                        rtsums[index] += rt
                        rtcounts[index] += 1

                    else:
                        areas[index].append(None)

                    index += 1

            out_strings = []
            index = 0
            # now write the strings for the file
            for area_list in areas:

                # write initial info:
                # peak unique id, peak average rt
                compo_peak = composite_peak(new_peak_lists[index])
                peak_UID = compo_peak.UID
                peak_UID_string = f'"{peak_UID}"'

                rt_avg = rtsums[index] / rtcounts[index]

                out_strings.append(
                    f"{peak_UID_string},{rt_avg / 60:.3f},{top_ion_list[index]:f}"
                )

                for area in area_list:
                    if area is not None:
                        out_strings[index] += f",{area:.4f}"
                    else:
                        out_strings[index] += ",NA"

                index += 1

            # now write the file
            #        print("length of areas[0]", len(areas[0]))
            #        print("length of areas", len(areas))
            #        print("length of out_strings", len(out_strings))
            for row in out_strings:
                fp.write(row + "\n")
Exemplo n.º 14
0
    def write_csv(self, rt_file_name, area_file_name, minutes=True):

        """
        @summary: Writes the alignment to CSV files

        This function writes two files: one containing the alignment of peak
        retention times and the other containing the alignment of peak areas.

        @param rt_file_name: The name for the retention time alignment file
        @type rt_file_name: StringType
        @param area_file_name: The name for the areas alignment file
        @type area_file_name: StringType
        @param minutes: An optional indicator whether to save retention times
            in minutes. If False, retention time will be saved in seconds
        @type minutes: BooleanType

        @author: Woon Wai Keen
        @author: Andrew Isaac
        @author: Vladimir Likic
        """

        try:
            fp1 = open(rt_file_name, "w")
            fp2 = open(area_file_name, "w")
        except IOError:
            error("Cannot open output file for writing")

        # create header
        header = '"UID","RTavg"'
        for item in self.expr_code:
            expr_code = ( '"%s"' % item )
            header = header + "," + expr_code
        header = header + "\n"

        # write headers
        fp1.write(header)
        fp2.write(header)

        # for each alignment position write alignment's peak and area
        for peak_idx in range(len(self.peakpos[0])):

            rts = []
            areas = []
            new_peak_list = []
            avgrt = 0
            countrt = 0

            for align_idx in range(len(self.peakpos)):

                peak = self.peakpos[align_idx][peak_idx]

                if peak is not None:

                    if minutes:
                        rt = peak.get_rt()/60.0
                    else:
                        rt = peak.get_rt()

                    rts.append(rt)
                    areas.append(peak.get_area())
                    new_peak_list.append(peak)

                    avgrt = avgrt + rt
                    countrt = countrt + 1
                else:
                    rts.append(None)
                    areas.append(None)

            if countrt > 0:
                avgrt = avgrt/countrt

            compo_peak = composite_peak(new_peak_list, minutes)
            peak_UID = compo_peak.get_UID()
            peak_UID_string = ( '"%s"' % peak_UID)

            # write to retention times file
            fp1.write(peak_UID_string)
            fp1.write(",%.3f" % avgrt)
            for rt in rts:
                if rt == None:
                    fp1.write(",NA")
                else:
                    fp1.write(",%.3f" % rt)
            fp1.write("\n")

            # write to peak areas file
            fp2.write(peak_UID_string)
            fp2.write(",%.3f" % avgrt)
            for area in areas:
                if area == None:
                    fp2.write(",NA")
                else:
                    fp2.write(",%.4f" % area)
            fp2.write("\n")

        fp1.close()
        fp2.close()
Exemplo n.º 15
0
def test_write_csv(A1, tmp_pathplus):
    A1.write_csv(tmp_pathplus / "alignment_rt.csv",
                 tmp_pathplus / "alignment_area.csv")

    # Read alignment_rt.csv and alignment_area.csv and check values
    assert (tmp_pathplus / "alignment_rt.csv").exists()
    assert (tmp_pathplus / "alignment_area.csv").exists()

    rt_csv = list(csv.reader((tmp_pathplus / "alignment_rt.csv").open()))
    area_csv = list(csv.reader((tmp_pathplus / "alignment_area.csv").open()))

    assert rt_csv[0][0:2] == area_csv[0][0:2] == ["UID", "RTavg"]
    assert rt_csv[0][2:] == area_csv[0][2:] == A1.expr_code

    for peak_idx in range(len(
            A1.peakpos[0])):  # loop through peak lists (rows)

        new_peak_list = []

        for align_idx in range(len(A1.peakpos)):
            peak = A1.peakpos[align_idx][peak_idx]

            if peak is not None:

                if peak.rt is None or numpy.isnan(peak.rt):
                    assert rt_csv[peak_idx + 1][align_idx + 2] == "NA"
                else:
                    assert rt_csv[peak_idx + 1][align_idx +
                                                2] == f"{peak.rt / 60:.3f}"

                if peak.area is None or numpy.isnan(peak.area):
                    assert area_csv[peak_idx + 1][align_idx + 2] == "NA"
                else:
                    assert area_csv[peak_idx + 1][align_idx +
                                                  2] == f"{peak.area:.0f}"

                new_peak_list.append(peak)

        compo_peak = composite_peak(new_peak_list)
        assert compo_peak is not None

        assert rt_csv[peak_idx + 1][0] == area_csv[peak_idx +
                                                   1][0] == compo_peak.UID

        assert rt_csv[peak_idx + 1][1] == area_csv[
            peak_idx + 1][1] == f"{float(compo_peak.rt / 60):.3f}"

    A1.write_csv(
        tmp_pathplus / "alignment_rt_seconds.csv",
        tmp_pathplus / "alignment_area_seconds.csv",
        minutes=False,
    )

    # Read alignment_rt_seconds.csv and alignment_area_seconds.csv and check values
    assert (tmp_pathplus / "alignment_rt_seconds.csv").exists()
    assert (tmp_pathplus / "alignment_area_seconds.csv").exists()

    rt_csv = list(
        csv.reader((tmp_pathplus / "alignment_rt_seconds.csv").open()))
    area_csv = list(
        csv.reader((tmp_pathplus / "alignment_area_seconds.csv").open()))

    assert rt_csv[0][0:2] == area_csv[0][0:2] == ["UID", "RTavg"]
    assert rt_csv[0][2:] == area_csv[0][2:] == A1.expr_code

    for peak_idx in range(len(
            A1.peakpos[0])):  # loop through peak lists (rows)

        new_peak_list = []

        for align_idx in range(len(A1.peakpos)):
            peak = A1.peakpos[align_idx][peak_idx]

            if peak is not None:

                if peak.rt is None or numpy.isnan(peak.rt):
                    assert rt_csv[peak_idx + 1][align_idx + 2] == "NA"
                else:
                    assert rt_csv[peak_idx + 1][align_idx +
                                                2] == f"{peak.rt:.3f}"

                if peak.area is None or numpy.isnan(peak.area):
                    assert area_csv[peak_idx + 1][align_idx + 2] == "NA"
                else:
                    assert area_csv[peak_idx + 1][align_idx +
                                                  2] == f"{peak.area:.0f}"

                new_peak_list.append(peak)

        compo_peak = composite_peak(new_peak_list)

        assert compo_peak is not None

        assert rt_csv[peak_idx + 1][0] == area_csv[peak_idx +
                                                   1][0] == compo_peak.UID

        assert rt_csv[peak_idx +
                      1][1] == area_csv[peak_idx +
                                        1][1] == f"{float(compo_peak.rt):.3f}"
Exemplo n.º 16
0
    def write_mass_hunter_csv(self, out_file,
                              top_ion_list):  #, peak_list_name):
        """
        @summary: Returns a csv file with ion ratios
                  and UID

        @param out_file: name of the output file
        @type out_file: strType

        @param top_ion_list: a list of the common ions for each
                             peak in the averaged peak list for the
                             alignment
        @type top_ion_list: listType

        @return: a csv file with UID, common and qualifying ions
                 and their ratios for mass hunter interpretation
        @rtype: fileType
        """
        try:
            fp = open(out_file, "w")
        except IOError:
            error("Cannot open output file for writing")

        if top_ion_list == None:
            error("List of common ions must be supplied")

        # create header
        header = '"UID","Common Ion", "Qual Ion 1", "ratio QI1/CI", "Qual Ion 2", "ratio QI2/CI", "l window delta", "r window delta"'
        header = header + "\n"

        # write headers

        fp.write(header)

        rtsums = []
        rtcounts = []

        # The following two arrays will become list of lists
        # such that:
        # areas = [  [align1_peak1, align2_peak1, .....,alignn_peak1]
        #            [align1_peak2, ................................]
        #              .............................................
        #            [align1_peakm,....................,alignn_peakm]  ]
        areas = []
        new_peak_lists = []
        rtmax = []
        rtmin = []

        for peak_list in self.peakpos:
            index = 0
            for peak in peak_list:
                # on the first iteration, populate the lists
                if len(areas) < len(peak_list):
                    areas.append([])
                    new_peak_lists.append([])
                    rtsums.append(0)
                    rtcounts.append(0)
                    rtmax.append(0.0)
                    rtmin.append(0.0)

                if peak is not None:
                    rt = peak.get_rt()

                    # get the area of the common ion for the peak
                    # an area of 'na' shows that while the peak was
                    # aligned, the common ion was not present
                    area = peak.get_ion_area(top_ion_list[index])

                    areas[index].append(area)
                    new_peak_lists[index].append(peak)

                    # The following code to the else statement is
                    # just for calculating the average rt
                    rtsums[index] += rt
                    rtcounts[index] += 1

                    # quick workaround for weird problem when
                    # attempting to set rtmin to max time above
                    if rtmin[index] == 0.0:
                        rtmin[index] = 5400.0

                    if rt > rtmax[index]:
                        rtmax[index] = rt

                    if rt < rtmin[index]:
                        rtmin[index] = rt

                else:
                    areas[index].append(None)

                index += 1

        out_strings = []
        compo_peaks = []
        index = 0
        # now write the strings for the file
        for area_list in areas:

            # write initial info:
            # peak unique id, peak average rt
            compo_peak = composite_peak(new_peak_lists[index], minutes=False)
            compo_peaks.append(compo_peak)
            peak_UID = compo_peak.get_UID()
            peak_UID_string = ('"%s"' % peak_UID)

            #calculate the time from the leftmost peak to the average
            l_window_delta = compo_peak.get_rt() - rtmin[index]
            #print "l_window", l_window_delta, "rt", compo_peak.get_rt(), "rt_min", rtmin[index]
            r_window_delta = rtmax[index] - compo_peak.get_rt()

            common_ion = top_ion_list[index]
            qual_ion_1 = int(peak_UID_string.split('-')[0].strip('"'))
            qual_ion_2 = int(peak_UID_string.split('-')[1])

            if qual_ion_1 == common_ion:
                qual_ion_1 = compo_peak.get_third_highest_mz()
            elif qual_ion_2 == common_ion:
                qual_ion_2 = compo_peak.get_third_highest_mz()
            else:
                pass

            ci_intensity = compo_peak.get_int_of_ion(common_ion)
            if ci_intensity == None:
                print "No Ci for peak", index
            q1_intensity = compo_peak.get_int_of_ion(qual_ion_1)
            q2_intensity = compo_peak.get_int_of_ion(qual_ion_2)

            try:
                q1_ci_ratio = float(q1_intensity) / float(ci_intensity)
            except (TypeError):  # if no area available for that ion
                q1_ci_ratio = 0.0
            except (ZeroDivisionError):  #shouldn't happen but does!!
                q1_ci_ratio = 0.01
            try:
                q2_ci_ratio = float(q2_intensity) / float(ci_intensity)
            except (TypeError):
                q2_ci_ratio = 0.0
            except (ZeroDivisionError):  #shouldn't happen, but does!!
                q2_ci_ratio = 0.01



            out_strings.append(peak_UID + ',' + str(common_ion) + ',' + \
                                   str(qual_ion_1) + \
                                   (",%.1f" % (q1_ci_ratio*100))\
                                   + ',' + str(qual_ion_2) + \
                                   (",%.1f" % (q2_ci_ratio*100)) +
                               (",%.2f" % ((l_window_delta+1.5)/60)) +
                               (",%.2f" % ((r_window_delta+1.5)/60)))
            index += 1

        # now write the file


#        print "length of areas[0]", len(areas[0])
#        print "lenght of areas", len(areas)
#        print "length of out_strings", len(out_strings)
        for row in out_strings:
            fp.write(row + "\n")

        #dump_object(compo_peaks, peak_list_name)

        fp.close()
Exemplo n.º 17
0
def write_mass_hunter_csv(
		alignment: Alignment,
		file_name: PathLike,
		top_ion_list: List[int],
		):  # , peak_list_name):
	"""
	Creates a csv file with UID, common and qualifying ions and their
	ratios for mass hunter interpretation.

	:param alignment: alignment object to write to file
	:param file_name: name of the output file.

	:param top_ion_list: a list of the common ions for each peak in the
		averaged peak list for the alignment.
	"""  # noqa: D400

	if not is_path(file_name):
		raise TypeError("'file_name' must be a string or a PathLike object")

	file_name = prepare_filepath(file_name)

	fp = file_name.open('w', encoding="UTF-8")

	if top_ion_list is None:
		raise ValueError("List of common ions must be supplied")

	# write headers
	fp.write(
			'"UID","Common Ion","Qual Ion 1","ratio QI1/CI","Qual Ion 2",'
			'"ratio QI2/CI","l window delta","r window delta"\n'
			)

	rtsums: List[float] = []
	rtcounts = []

	# The following two arrays will become list of lists
	# such that:
	# areas = [  [align1_peak1, align2_peak1, .....,alignn_peak1]
	#            [align1_peak2, ................................]
	#              .............................................
	#            [align1_peakm,....................,alignn_peakm]  ]
	areas = []  # type: ignore
	new_peak_lists = []  # type: ignore
	rtmax = []
	rtmin = []

	for peak_list in alignment.peakpos:
		index = 0

		for peak in peak_list:
			# on the first iteration, populate the lists
			if len(areas) < len(peak_list):
				areas.append([])
				new_peak_lists.append([])
				rtsums.append(0)
				rtcounts.append(0)
				rtmax.append(0.0)
				rtmin.append(0.0)

			if peak is not None:
				rt = peak.rt

				# get the area of the common ion for the peak
				# an area of 'na' shows that while the peak was
				# aligned, the common ion was not present
				area = peak.get_ion_area(top_ion_list[index])

				areas[index].append(area)
				new_peak_lists[index].append(peak)

				# The following code to the else statement is
				# just for calculating the average rt
				rtsums[index] += rt
				rtcounts[index] += 1

				# quick workaround for weird problem when
				# attempting to set rtmin to max time above
				if rtmin[index] == 0.0:
					rtmin[index] = 5400.0

				if rt > rtmax[index]:
					rtmax[index] = rt

				if rt < rtmin[index]:
					rtmin[index] = rt

			else:
				areas[index].append(None)

			index += 1

	out_strings = []
	compo_peaks = []
	index = 0
	# now write the strings for the file
	for area_list in areas:

		# write initial info:
		# peak unique id, peak average rt
		compo_peak = composite_peak(new_peak_lists[index])
		if compo_peak is None:
			continue

		compo_peaks.append(compo_peak)
		peak_UID = compo_peak.UID
		peak_UID_string = f'"{peak_UID}"'

		# calculate the time from the leftmost peak to the average
		l_window_delta = compo_peak.rt - rtmin[index]
		# print("l_window", l_window_delta, "rt", compo_peak.rt, "rt_min", rtmin[index])
		r_window_delta = rtmax[index] - compo_peak.rt

		common_ion = top_ion_list[index]
		qual_ion_1 = int(peak_UID_string.split('-')[0].strip('"'))
		qual_ion_2 = int(peak_UID_string.split('-')[1])

		if qual_ion_1 == common_ion:
			qual_ion_1 = compo_peak.get_third_highest_mz()
		elif qual_ion_2 == common_ion:
			qual_ion_2 = compo_peak.get_third_highest_mz()
		else:
			pass

		ci_intensity = compo_peak.get_int_of_ion(common_ion)
		q1_intensity = compo_peak.get_int_of_ion(qual_ion_1)
		q2_intensity = compo_peak.get_int_of_ion(qual_ion_2)

		try:
			q1_ci_ratio = float(q1_intensity) / float(ci_intensity)
		except TypeError:  # if no area available for that ion
			q1_ci_ratio = 0.0
		except ZeroDivisionError:
			# shouldn't happen but does!!
			q1_ci_ratio = 0.01
		try:
			q2_ci_ratio = float(q2_intensity) / float(ci_intensity)
		except TypeError:
			q2_ci_ratio = 0.0
		except ZeroDivisionError:
			# shouldn't happen, but does!!
			q2_ci_ratio = 0.01

		out_strings.append(
				','.join([
						peak_UID,
						f"{common_ion}",
						f"{qual_ion_1}",
						f"{q1_ci_ratio * 100:.1f}",
						f"{qual_ion_2}",
						f"{q2_ci_ratio * 100:.1f}",
						f"{(l_window_delta + 1.5) / 60:.2f}",
						f"{(r_window_delta + 1.5) / 60:.2f}",
						])
				)

		index += 1

	# now write the file
	#        print("length of areas[0]", len(areas[0]))
	#        print("lenght of areas", len(areas))
	#        print("length of out_strings", len(out_strings))
	for row in out_strings:
		fp.write(f"{row}\n")

	# dump_object(compo_peaks, peak_list_name)

	fp.close()
Exemplo n.º 18
0
def write_transposed_output(
		alignment: Alignment,
		file_name: PathLike,
		minutes: bool = True,
		):
	"""

	:param alignment: :class:`pyms.DPA.Alignment.Alignment` object to write to file
	:param file_name: The name of the file
	:param minutes:
	"""

	if not is_path(file_name):
		raise TypeError("'file_name' must be a string or a PathLike object")

	file_name = prepare_filepath(file_name)

	wb = Workbook()
	ws1 = wb.create_sheet(title="Aligned RT")
	ws2 = wb.create_sheet(title="Aligned Area")

	ws1["A1"] = "Peak"
	ws1["A2"] = "RTavg"

	ws2["A1"] = "Peak"
	ws2["A2"] = "RTavg"

	style_outlier = PatternFill(fill_type="solid", fgColor="FFAE19", bgColor="FFAE19")

	# write column with sample IDs
	for i, item in enumerate(alignment.expr_code):
		ws1.cell(column=1, row=i + 3, value=f"{item}")
		ws2.cell(column=1, row=i + 3, value=f"{item}")

	# for each alignment position write alignment's peak and area
	for peak_idx in range(len(alignment.peakpos[0])):  # loop through peak lists

		new_peak_list = []  # this will contain a list of tuples of form (peak, col, row), but only non-NA peaks

		for align_idx in range(len(alignment.peakpos)):  # loops through samples
			peak = alignment.peakpos[align_idx][peak_idx]
			cell_col = 2 + peak_idx
			cell_row = 3 + align_idx

			if peak is not None:

				if minutes:
					rt = peak.rt / 60.0
				else:
					rt = peak.rt

				area = peak.area

				# these are the col,row coords of the peak in the output matrix
				new_peak_list.append((peak, cell_col, cell_row))

				# write the RT into the cell in the excel file
				currcell1 = ws1.cell(column=cell_col, row=cell_row, value=round(rt, 3))
				ws2.cell(column=cell_col, row=cell_row, value=round(area, 3))  # type: ignore

				# get the mini-mass spec for this peak, and divide the ion intensities by 1000 to shorten them
				ia = peak.ion_areas
				ia.update((mass, int(intensity / 1000)) for mass, intensity in ia.items())
				sorted_ia = sorted(ia.items(), key=operator.itemgetter(1), reverse=True)

				# write the peak area and mass spec into the comment for the cell
				comment = Comment(f"Area: {area:.0f} | MassSpec: {sorted_ia}", "dave")
				currcell1.comment = comment

			else:
				# rt = 'NA'
				# area = 'NA'
				currcell1 = ws1.cell(column=cell_col, row=cell_row, value="NA")
				ws2.cell(column=cell_col, row=cell_row, value="NA")
				comment = Comment("Area: NA", "dave")
				currcell1.comment = comment

		# this method will create the compo peak, and also mark outlier peaks with a bool is_outlier
		compo_peak = composite_peak(list(p[0] for p in new_peak_list))

		if compo_peak is not None:
			ws1.cell(column=2 + peak_idx, row=1, value=f'"{compo_peak.UID}"')
			ws1.cell(column=2 + peak_idx, row=2, value=f"{float(compo_peak.rt / 60):.3f}")
			ws2.cell(column=2 + peak_idx, row=1, value=f'"{compo_peak.UID}"')
			ws2.cell(column=2 + peak_idx, row=2, value=f"{float(compo_peak.rt / 60):.3f}")

			# highlight outlier cells in the current peak list
			for p in new_peak_list:
				if p[0].is_outlier:
					# ws[ get_column_letter(p[1]) + str(p[2]) ].style = style_outlier
					ws1.cell(column=p[1], row=p[2]).fill = style_outlier
					ws2.cell(column=p[1], row=p[2]).fill = style_outlier

	wb.save(file_name)
Exemplo n.º 19
0
def write_excel(
		alignment: Alignment,
		file_name: PathLike,
		minutes: bool = True,
		):
	"""
	Writes the alignment to an excel file, with colouring showing possible mis-alignments.

	:param alignment: :class:`pyms.DPA.Alignment.Alignment` object to write to file.
	:param file_name: The name for the retention time alignment file.
	:param minutes: Whether to save retention times in minutes.
		If :py:obj:`False`, retention time will be saved in seconds.

	:author: David Kainer
	"""

	if not is_path(file_name):
		raise TypeError("'file_name' must be a string or a PathLike object")

	file_name = prepare_filepath(file_name)

	wb = Workbook()
	ws = wb.active
	ws.title = "Aligned RT"

	# create header row
	ws["A1"] = "UID"
	ws["B1"] = "RTavg"
	for i, item in enumerate(alignment.expr_code):
		currcell = ws.cell(row=1, column=i + 3, value=f"{item}")
		comment = Comment("sample " + str(i), "dave")
		currcell.comment = comment

	# for each alignment position write alignment's peak and area
	for peak_idx in range(len(alignment.peakpos[0])):  # loop through peak lists (rows)

		new_peak_list = []

		for align_idx in range(len(alignment.peakpos)):  # loops through samples (columns)
			peak = alignment.peakpos[align_idx][peak_idx]

			if peak is not None:

				if minutes:
					rt = peak.rt / 60.0
				else:
					rt = peak.rt

				area = peak.area
				new_peak_list.append(peak)

				# write the RT into the cell in the excel file
				currcell = ws.cell(row=2 + peak_idx, column=3 + align_idx, value=round(rt, 3))

				# get the mini-mass spec for this peak, and divide the ion intensities by 1000 to shorten them
				ia = peak.ion_areas
				ia.update((mass, int(intensity / 1000)) for mass, intensity in ia.items())
				sorted_ia = sorted(ia.items(), key=operator.itemgetter(1), reverse=True)

				# write the peak area and mass spec into the comment for the cell
				comment = Comment(f"Area: {area:.0f} | MassSpec: {sorted_ia}", "dave")
				# currcell.number_format
				currcell.comment = comment

			else:
				# rt = 'NA'
				# area = 'NA'
				currcell = ws.cell(row=2 + peak_idx, column=3 + align_idx, value="NA")
				comment = Comment("Area: NA", "dave")
				# currcell.number_format
				currcell.comment = comment

		compo_peak = composite_peak(new_peak_list)

		if compo_peak is not None:
			peak_UID = compo_peak.UID
			peak_UID_string = f'"{peak_UID}"'

			ws.cell(row=2 + peak_idx, column=1, value=peak_UID_string)
			ws.cell(row=2 + peak_idx, column=2, value=f"{float(compo_peak.rt / 60):.3f}")

	# colour the cells in each row based on their RT percentile for that row
	i = 0
	for row in ws.rows:
		i += 1
		cell_range = ("{0}" + str(i) + ":{1}" + str(i)).format(get_column_letter(3), get_column_letter(len(row)))
		ws.conditional_formatting.add(
				cell_range,
				ColorScaleRule(
						start_type="percentile",
						start_value=1,
						start_color="E5FFCC",
						mid_type="percentile",
						mid_value=50,
						mid_color="FFFFFF",
						end_type="percentile",
						end_value=99,
						end_color="FFE5CC"
						),
				)

		wb.save(file_name)
Exemplo n.º 20
0
    def write_ion_areas_csv(self,
                            ms_file_name: Union[str, pathlib.Path],
                            minutes: bool = True):
        """
		Write Ion Areas to CSV File

		:param ms_file_name: The name of the file
		:type ms_file_name: str, PathLike
		:param minutes:
		:type minutes: bool

		:author: David Kainer
		:author: Dominic Davis-Foster (pathlib support)
		"""

        if not is_path(ms_file_name):
            raise TypeError(
                "'ms_file_name' must be a string or a PathLike object")

        ms_file_name = prepare_filepath(ms_file_name)

        with ms_file_name.open("w") as fp1:

            # create header

            header = ['"UID"', '"RTavg"']
            for item in self.expr_code:
                header.append(f'"{item}"')

            # write headers
            fp1.write("|".join(header) + "\n")

            for peak_idx in range(len(self.peakpos[0])):

                ias = []
                new_peak_list = []

                for align_idx in range(len(self.peakpos)):

                    peak = self.peakpos[align_idx][peak_idx]

                    if peak is not None:

                        ia = peak.ion_areas
                        ia.update((mass, math.floor(intensity))
                                  for mass, intensity in ia.items())
                        sorted_ia = sorted(ia.items(),
                                           key=operator.itemgetter(1),
                                           reverse=True)
                        ias.append(sorted_ia)
                        new_peak_list.append(peak)

                compo_peak = composite_peak(new_peak_list)

                # write to ms file
                fp1.write(compo_peak.UID)

                if minutes:
                    fp1.write(f"|{compo_peak.rt/60:.3f}")
                else:
                    fp1.write(f"|{compo_peak.rt:.3f}")

                for ia in ias:
                    if ia is None:
                        fp1.write("|NA")
                    else:
                        fp1.write(f"|{ia}")

                fp1.write("\n")
Exemplo n.º 21
0
    def write_csv_dk(self, rt_file_name, area_file_name, minutes=True):
        """
        @summary: Writes the alignment to CSV files, but excluded outliers from the calculation of composite peak

        This function writes two files: one containing the alignment of peak
        retention times and the other containing the alignment of peak areas.

        @param rt_file_name: The name for the retention time alignment file
        @type rt_file_name: StringType
        @param area_file_name: The name for the areas alignment file
        @type area_file_name: StringType
        @param minutes: An optional indicator whether to save retention times
            in minutes. If False, retention time will be saved in seconds
        @type minutes: BooleanType

        @author: David Kainer
        """

        try:
            fp1 = open(rt_file_name, "w")
            fp2 = open(area_file_name, "w")
        except IOError:
            error("Cannot open output file for writing")

        # create header
        header = '"UID","RTavg"'
        for item in self.expr_code:
            expr_code = ('"%s"' % item)
            header = header + "," + expr_code
        header = header + "\n"

        # write headers
        fp1.write(header)
        fp2.write(header)

        # for each alignment position write alignment's peak and area
        for peak_idx in range(len(
                self.peakpos[0])):  # loop through peak lists (rows)

            rts = []
            areas = []
            new_peak_list = []

            for align_idx in range(len(
                    self.peakpos)):  # loops through samples (columns)
                peak = self.peakpos[align_idx][peak_idx]

                if peak is not None:

                    if minutes:
                        rt = peak.get_rt() / 60.0
                    else:
                        rt = peak.get_rt()

                    rts.append(rt)
                    areas.append(peak.get_area())
                    new_peak_list.append(peak)

                else:
                    rts.append(numpy.nan)
                    areas.append(None)

            compo_peak = composite_peak(new_peak_list, minutes)
            peak_UID = compo_peak.get_UID()
            peak_UID_string = ('"%s"' % peak_UID)

            # write to retention times file
            fp1.write(peak_UID_string)
            fp1.write(",%.3f" % float(compo_peak.get_rt() / 60))

            for rt in rts:
                if numpy.isnan(rt):
                    fp1.write(",NA")
                else:
                    fp1.write(",%.3f" % rt)
            fp1.write("\n")

            # write to peak areas file
            fp2.write(peak_UID_string)
            fp2.write(",%.3f" % float(compo_peak.get_rt() / 60))
            for area in areas:
                if area == None:
                    fp2.write(",NA")
                else:
                    fp2.write(",%.0f" % area)
            fp2.write("\n")

        fp1.close()
        fp2.close()
Exemplo n.º 22
0
    def write_common_ion_csv(self, area_file_name, top_ion_list, minutes=True):

        """
        @summary: Writes the alignment to CSV files

        This function writes two files: one containing the alignment of peak
        retention times and the other containing the alignment of peak areas.

        @param area_file_name: The name for the areas alignment file
        @type area_file_name: StringType
        @param top_ion_list: A list of the highest intensity common ion
                             along the aligned peaks
        @type top_ion_list: ListType
        @param minutes: An optional indicator whether to save retention times
            in minutes. If False, retention time will be saved in seconds
        @type minutes: BooleanType

        @author: Woon Wai Keen
        @author: Andrew Isaac
        @author: Sean O'Callaghan
        @author: Vladimir Likic
        """

        try:
            fp = open(area_file_name, "w")
        except IOError:
            error("Cannot open output file for writing")

        if top_ion_list == None:
            error("List of common ions must be supplied")

        # create header
        header = '"UID","RTavg", "Quant Ion"'
        for item in self.expr_code:
            expr_code = ( '"%s"' % item )
            header = header + "," + expr_code
        header = header + "\n"

        # write headers

        fp.write(header)

        rtsums = []
        rtcounts = []

        # The following two arrays will become list of lists
        # such that:
        # areas = [  [align1_peak1, align2_peak1, .....,alignn_peak1]
        #            [align1_peak2, ................................]
        #              .............................................
        #            [align1_peakm,....................,alignn_peakm]  ]
        areas = []
        new_peak_lists = []

        for peak_list in self.peakpos:
            index = 0
            for peak in peak_list:
                # one the first iteration, populate the lists
                if len(areas) < len(peak_list):
                    areas.append([])
                    new_peak_lists.append([])
                    rtsums.append(0)
                    rtcounts.append(0)

                if peak is not None:
                    rt = peak.get_rt()

                    # get the area of the common ion for the peak
                    # an area of 'na' shows that while the peak was
                    # aligned, the common ion was not present
                    area = peak.get_ion_area(top_ion_list[index])
                     
                    areas[index].append(area)
                    new_peak_lists[index].append(peak)

                    # The following code to the else statement is
                    # just for calculating the average rt
                    rtsums[index] += rt
                    rtcounts[index] += 1
                    
                else:
                    areas[index].append(None)

                index += 1

        out_strings = []
        index = 0
        # now write the strings for the file
        for area_list in areas:
 
            # write initial info:
            # peak unique id, peak average rt
            compo_peak = composite_peak(new_peak_lists[index], minutes)
            peak_UID = compo_peak.get_UID()
            peak_UID_string = ( '"%s"' % peak_UID)

            rt_avg = rtsums[index]/rtcounts[index]
                    
            out_strings.append(peak_UID_string + (",%.3f" % (rt_avg/60))+\
                                   (",%d" % top_ion_list[index]))

            for area in area_list:
                if area is not None:
                    out_strings[index] += (",%.4f" % area)
                else:
                    out_strings[index] += (",NA")
            
            index += 1

        # now write the file
#        print "length of areas[0]", len(areas[0])
#        print "lenght of areas", len(areas)
#        print "length of out_strings", len(out_strings)
        for row in out_strings:
            fp.write(row +"\n")
                
        fp.close()
Exemplo n.º 23
0
    def write_transposed_output(self, excel_file_name, minutes=True):
        wb = Workbook()
        ws1 = wb.create_sheet(title='Aligned RT')
        ws2 = wb.create_sheet(title='Aligned Area')

        ws1['A1'] = "Peak"
        ws1['A2'] = "RTavg"

        ws2['A1'] = "Peak"
        ws2['A2'] = "RTavg"

        style_outlier = PatternFill(fill_type="solid",
                                    fgColor="FFAE19",
                                    bgColor="FFAE19")

        # write column with sample IDs
        for i, item in enumerate(self.expr_code):
            currcell = ws1.cell(column=1, row=i + 3, value="%s" % item)
            currcell = ws2.cell(column=1, row=i + 3, value="%s" % item)

        # for each alignment position write alignment's peak and area
        for peak_idx in range(len(self.peakpos[0])):  # loop through peak lists

            new_peak_list = [
            ]  # this will contain a list of tuples of form (peak, col, row), but only non-NA peaks
            cell_col, cell_row = 0, 0

            for align_idx in range(len(self.peakpos)):  # loops through samples
                peak = self.peakpos[align_idx][peak_idx]
                cell_col = 2 + peak_idx
                cell_row = 3 + align_idx

                if peak is not None:

                    if minutes:
                        rt = peak.get_rt() / 60.0
                    else:
                        rt = peak.get_rt()

                    area = peak.get_area()

                    #these are the col,row coords of the peak in the output matrix
                    new_peak_list.append((peak, cell_col, cell_row))

                    # write the RT into the cell in the excel file
                    currcell1 = ws1.cell(column=cell_col,
                                         row=cell_row,
                                         value=round(rt, 3))
                    currcell2 = ws2.cell(column=cell_col,
                                         row=cell_row,
                                         value=round(area, 3))

                    # get the mini-mass spec for this peak, and divide the ion intensities by 1000 to shorten them
                    ia = peak.get_ion_areas()
                    ia.update((mass, int(intensity / 1000))
                              for mass, intensity in ia.items())
                    sorted_ia = sorted(ia.iteritems(),
                                       key=operator.itemgetter(1),
                                       reverse=True)

                    # write the peak area and mass spec into the comment for the cell
                    comment = Comment(
                        "Area: %.0f | MassSpec: %s" % (area, sorted_ia),
                        'dave')
                    currcell1.comment = comment

                else:
                    rt = 'NA'
                    area = 'NA'
                    currcell1 = ws1.cell(column=cell_col,
                                         row=cell_row,
                                         value='NA')
                    currcell2 = ws2.cell(column=cell_col,
                                         row=cell_row,
                                         value='NA')
                    comment = Comment("Area: NA", 'dave')
                    currcell1.comment = comment

            compo_peak = composite_peak(
                list(p[0] for p in new_peak_list), minutes
            )  # this method will create the compo peak, aqnd also mark outlier peaks with a bool isoutlier
            peak_UID = compo_peak.get_UID()
            peak_UID_string = (
                "%s" % peak_UID
            )  #JT: removed nested "" to make it easier to work with R

            currcell = ws1.cell(column=2 + peak_idx,
                                row=1,
                                value=peak_UID_string)
            currcell = ws1.cell(column=2 + peak_idx,
                                row=2,
                                value="%.3f" % float(compo_peak.get_rt() / 60))
            currcell = ws2.cell(column=2 + peak_idx,
                                row=1,
                                value=peak_UID_string)
            currcell = ws2.cell(column=2 + peak_idx,
                                row=2,
                                value="%.3f" % float(compo_peak.get_rt() / 60))

            # highlight outlier cells in the current peak list
            for p in new_peak_list:
                if p[0].isoutlier:
                    #ws[ get_column_letter(p[1]) + str(p[2]) ].style = style_outlier
                    ws1.cell(column=p[1], row=p[2]).fill = style_outlier
                    ws2.cell(column=p[1], row=p[2]).fill = style_outlier

        wb.save(excel_file_name)
Exemplo n.º 24
0
    def write_excel(self, excel_file_name, minutes=True):
        """
        @summary: Writes the alignment to an excel file, with colouring showing possible mis-alignments

        @param excel_file_name: The name for the retention time alignment file
        @type excel_file_name: StringType
        @param minutes: An optional indicator whether to save retention times
            in minutes. If False, retention time will be saved in seconds
        @type minutes: BooleanType

        @author: David Kainer
        """
        wb = Workbook()
        ws = wb.active
        ws.title = "Aligned RT"

        # create header row
        ws['A1'] = "UID"
        ws['B1'] = "RTavg"
        for i, item in enumerate(self.expr_code):
            currcell = ws.cell(row=1, column=i + 3, value="%s" % item)
            comment = Comment('sample ' + str(i), 'dave')
            currcell.comment = comment

        # for each alignment position write alignment's peak and area
        for peak_idx in range(len(
                self.peakpos[0])):  # loop through peak lists (rows)

            new_peak_list = []

            for align_idx in range(len(
                    self.peakpos)):  # loops through samples (columns)
                peak = self.peakpos[align_idx][peak_idx]

                if peak is not None:

                    if minutes:
                        rt = peak.get_rt() / 60.0
                    else:
                        rt = peak.get_rt()

                    area = peak.get_area()
                    new_peak_list.append(peak)

                    # write the RT into the cell in the excel file
                    currcell = ws.cell(row=2 + peak_idx,
                                       column=3 + align_idx,
                                       value=round(rt, 3))

                    # get the mini-mass spec for this peak, and divide the ion intensities by 1000 to shorten them
                    ia = peak.get_ion_areas()
                    ia.update((mass, int(intensity / 1000))
                              for mass, intensity in ia.items())
                    sorted_ia = sorted(ia.iteritems(),
                                       key=operator.itemgetter(1),
                                       reverse=True)

                    # write the peak area and mass spec into the comment for the cell
                    comment = Comment(
                        "Area: %.0f | MassSpec: %s" % (area, sorted_ia),
                        'dave')
                    currcell.number_format
                    currcell.comment = comment

                else:
                    rt = 'NA'
                    area = 'NA'
                    currcell = ws.cell(row=2 + peak_idx,
                                       column=3 + align_idx,
                                       value='NA')
                    comment = Comment("Area: NA", 'dave')
                    currcell.number_format
                    currcell.comment = comment

            compo_peak = composite_peak(new_peak_list, minutes)
            peak_UID = compo_peak.get_UID()
            peak_UID_string = ('"%s"' % peak_UID)

            currcell = ws.cell(row=2 + peak_idx,
                               column=1,
                               value=peak_UID_string)
            currcell = ws.cell(row=2 + peak_idx,
                               column=2,
                               value="%.3f" % float(compo_peak.get_rt() / 60))

        # colour the cells in each row based on their RT percentile for that row
        i = 0
        for row in ws.rows:
            i += 1
            cell_range = ("{0}" + str(i) + ":{1}" + str(i)).format(
                utils.get_column_letter(3), utils.get_column_letter(len(row)))
            ws.conditional_formatting.add(
                cell_range,
                ColorScaleRule(start_type='percentile',
                               start_value=1,
                               start_color='E5FFCC',
                               mid_type='percentile',
                               mid_value=50,
                               mid_color='FFFFFF',
                               end_type='percentile',
                               end_value=99,
                               end_color='FFE5CC'))
        wb.save(excel_file_name)
Exemplo n.º 25
0
    def write_csv_dk(self, rt_file_name, area_file_name, minutes=True):

        """
        @summary: Writes the alignment to CSV files, but excluded outliers from the calculation of composite peak

        This function writes two files: one containing the alignment of peak
        retention times and the other containing the alignment of peak areas.

        @param rt_file_name: The name for the retention time alignment file
        @type rt_file_name: StringType
        @param area_file_name: The name for the areas alignment file
        @type area_file_name: StringType
        @param minutes: An optional indicator whether to save retention times
            in minutes. If False, retention time will be saved in seconds
        @type minutes: BooleanType

        @author: David Kainer
        """

        try:
            fp1 = open(rt_file_name, "w")
            fp2 = open(area_file_name, "w")
        except IOError:
            error("Cannot open output file for writing")

        # create header
        header = '"UID","RTavg"'
        for item in self.expr_code:
            expr_code = ( '"%s"' % item )
            header = header + "," + expr_code
        header = header + "\n"

        # write headers
        fp1.write(header)
        fp2.write(header)

        # for each alignment position write alignment's peak and area
        for peak_idx in range(len(self.peakpos[0])):    # loop through peak lists (rows)

            rts = []
            areas = []
            new_peak_list = []

            for align_idx in range(len(self.peakpos)):   # loops through samples (columns)
                peak = self.peakpos[align_idx][peak_idx]

                if peak is not None:

                    if minutes:
                        rt = peak.get_rt()/60.0
                    else:
                        rt = peak.get_rt()

                    rts.append(rt)
                    areas.append(peak.get_area())
                    new_peak_list.append(peak)

                else:
                    rts.append(numpy.nan)
                    areas.append(None)

            compo_peak = composite_peak(new_peak_list, minutes)
            peak_UID = compo_peak.get_UID()
            peak_UID_string = ( '"%s"' % peak_UID)

            # write to retention times file
            fp1.write(peak_UID_string)
            fp1.write(",%.3f" % float(compo_peak.get_rt()/60))

            for rt in rts:
                if numpy.isnan(rt):
                    fp1.write(",NA")
                else:
                    fp1.write(",%.3f" % rt)
            fp1.write("\n")

            # write to peak areas file
            fp2.write(peak_UID_string)
            fp2.write(",%.3f" % float(compo_peak.get_rt()/60))
            for area in areas:
                if area == None:
                    fp2.write(",NA")
                else:
                    fp2.write(",%.0f" % area)
            fp2.write("\n")

        fp1.close()
        fp2.close()