def aligned_peaks(self, minutes=False): """ @summary: Returns a list of Peak objects where each peak has the combined spectra and average retention time of all peaks that aligned. @param minutes: An optional indicator of whether retention times are in minutes. If False, retention time are in seconds @type minutes: BooleanType @return: A list of composite peaks based on the alignment. @rtype: ListType @author: Andrew Isaac """ # for all peaks found peak_list = [] for peak_idx in range(len(self.peakpos[0])): # get aligned peaks, ignore missing new_peak_list = [] for align_idx in range(len(self.peakpos)): peak = self.peakpos[align_idx][peak_idx] if peak is not None: new_peak_list.append(peak) #create composite new_peak = composite_peak(new_peak_list, minutes) peak_list.append(new_peak) return peak_list
def aligned_peaks(self, minutes: bool = False) -> List: """ Returns a list of Peak objects where each peak has the combined spectra and average retention time of all peaks that aligned. :param minutes: An optional indicator of whether retention times are in minutes. If False, retention time are in seconds :type minutes: bool, optional :return: A list of composite peaks based on the alignment. :rtype: list :author: Andrew Isaac """ # TODO: minutes currently does nothing # for all peaks found peak_list = [] for peak_idx in range(len(self.peakpos[0])): # get aligned peaks, ignore missing new_peak_list = [] for align_idx in range(len(self.peakpos)): peak = self.peakpos[align_idx][peak_idx] if peak is not None: new_peak_list.append(peak) # create composite new_peak = composite_peak(new_peak_list) peak_list.append(new_peak) return peak_list
def test_write_ion_areas_csv(A1, tmp_pathplus): A1.write_ion_areas_csv(tmp_pathplus / "alignment_ion_areas.csv") A1.write_ion_areas_csv(tmp_pathplus / "alignment_ion_areas_seconds.csv", minutes=False) # Read alignment_ion_areas.csv and check values assert (tmp_pathplus / "alignment_ion_areas.csv").exists() ion_csv = list( csv.reader((tmp_pathplus / "alignment_ion_areas.csv").open(), delimiter='|')) seconds_ion_csv = list( csv.reader((tmp_pathplus / "alignment_ion_areas_seconds.csv").open(), delimiter='|')) assert ion_csv[0][0:2] == seconds_ion_csv[0][0:2] == ["UID", "RTavg"] assert ion_csv[0][2:] == seconds_ion_csv[0][2:] == A1.expr_code for peak_idx in range(len( A1.peakpos[0])): # loop through peak lists (rows) new_peak_list = [] for align_idx in range(len(A1.peakpos)): peak = A1.peakpos[align_idx][peak_idx] if peak is not None: ia = peak.ion_areas ia.update((mass, math.floor(intensity)) for mass, intensity in ia.items()) sorted_ia = sorted(ia.items(), key=operator.itemgetter(1), reverse=True) assert ion_csv[peak_idx + 1][align_idx + 2] == str(sorted_ia) assert seconds_ion_csv[peak_idx + 1][align_idx + 2] == str(sorted_ia) new_peak_list.append(peak) compo_peak = composite_peak(new_peak_list) assert compo_peak is not None assert ion_csv[peak_idx + 1][0] == seconds_ion_csv[peak_idx + 1][0] == compo_peak.UID assert ion_csv[peak_idx + 1][1] == f"{float(compo_peak.rt / 60):.3f}" assert seconds_ion_csv[peak_idx + 1][1] == f"{float(compo_peak.rt):.3f}"
def write_excel(self, excel_file_name, minutes=True): """ @summary: Writes the alignment to an excel file, with colouring showing possible mis-alignments @param excel_file_name: The name for the retention time alignment file @type excel_file_name: StringType @param minutes: An optional indicator whether to save retention times in minutes. If False, retention time will be saved in seconds @type minutes: BooleanType @author: David Kainer """ wb = Workbook() ws = wb.active ws.title = "Aligned RT" # create header row ws['A1'] = "UID" ws['B1'] = "RTavg" for i,item in enumerate(self.expr_code): currcell = ws.cell( row = 1, column = i+3, value= "%s" % item ) comment = Comment('sample '+str(i), 'dave') currcell.comment = comment # for each alignment position write alignment's peak and area for peak_idx in range(len(self.peakpos[0])): # loop through peak lists (rows) new_peak_list = [] for align_idx in range(len(self.peakpos)): # loops through samples (columns) peak = self.peakpos[align_idx][peak_idx] if peak is not None: if minutes: rt = peak.get_rt()/60.0 else: rt = peak.get_rt() area = peak.get_area() new_peak_list.append(peak) # write the RT into the cell in the excel file currcell = ws.cell( row = 2+peak_idx, column = 3+align_idx, value=round(rt, 3) ) # get the mini-mass spec for this peak, and divide the ion intensities by 1000 to shorten them ia = peak.get_ion_areas() ia.update( (mass, int(intensity/1000)) for mass, intensity in ia.items() ) sorted_ia = sorted(ia.iteritems(), key=operator.itemgetter(1), reverse=True) # write the peak area and mass spec into the comment for the cell comment = Comment("Area: %.0f | MassSpec: %s" % (area,sorted_ia), 'dave') currcell.number_format currcell.comment = comment else: rt = 'NA' area = 'NA' currcell = ws.cell( row = 2+peak_idx, column = 3+align_idx, value='NA' ) comment = Comment("Area: NA", 'dave') currcell.number_format currcell.comment = comment compo_peak = composite_peak(new_peak_list, minutes) peak_UID = compo_peak.get_UID() peak_UID_string = ( '"%s"' % peak_UID) currcell = ws.cell( row = 2+peak_idx, column = 1, value = peak_UID_string ) currcell = ws.cell( row = 2+peak_idx, column = 2, value = "%.3f" % float(compo_peak.get_rt()/60) ) # colour the cells in each row based on their RT percentile for that row i = 0 for row in ws.rows: i += 1 cell_range = ("{0}"+str(i)+":{1}"+str(i)).format(utils.get_column_letter(3), utils.get_column_letter(len(row))) ws.conditional_formatting.add(cell_range, ColorScaleRule(start_type='percentile', start_value=1, start_color='E5FFCC', mid_type='percentile', mid_value=50, mid_color='FFFFFF', end_type='percentile', end_value=99, end_color='FFE5CC')) wb.save(excel_file_name)
def write_csv(self, rt_file_name: Union[str, pathlib.Path], area_file_name: Union[str, pathlib.Path], minutes: bool = True): """ Writes the alignment to CSV files This function writes two files: one containing the alignment of peak retention times and the other containing the alignment of peak areas. :param rt_file_name: The name for the retention time alignment file :type rt_file_name: str or pathlib.Path :param area_file_name: The name for the areas alignment file :type area_file_name: str or pathlib.Path :param minutes: An optional indicator whether to save retention times in minutes. If False, retention time will be saved in seconds :type minutes: bool, optional :author: Woon Wai Keen :author: Andrew Isaac :author: Vladimir Likic :author: David Kainer :author: Dominic Davis-Foster (pathlib support) """ if not isinstance(rt_file_name, (str, pathlib.Path)): raise TypeError( "'rt_file_name' must be a string or a pathlib.Path object") if not isinstance(area_file_name, (str, pathlib.Path)): raise TypeError( "'area_file_name' must be a string or a pathlib.Path object") rt_file_name = prepare_filepath(rt_file_name) area_file_name = prepare_filepath(area_file_name) fp1 = rt_file_name.open("w") fp2 = area_file_name.open("w") # create header header = ['UID', 'RTavg'] for item in self.expr_code: header.append(f'"{item}"') # write headers fp1.write(",".join(header) + "\n") fp2.write(",".join(header) + "\n") # for each alignment position write alignment's peak and area for peak_idx in range(len( self.peakpos[0])): # loop through peak lists (rows) rts = [] areas = [] new_peak_list = [] for align_idx in range(len(self.peakpos)): peak = self.peakpos[align_idx][peak_idx] if peak is not None: if minutes: rt = peak.rt / 60.0 else: rt = peak.rt rts.append(rt) areas.append(peak.area) new_peak_list.append(peak) else: rts.append(None) areas.append(None) compo_peak = composite_peak(new_peak_list) # write to retention times file fp1.write(compo_peak.UID) if minutes: fp1.write(f",{float(compo_peak.rt / 60):.3f}") else: fp1.write(f",{compo_peak.rt:.3f}") for rt in rts: if rt is None or numpy.isnan(rt): fp1.write(",NA") else: fp1.write(f",{rt:.3f}") fp1.write("\n") # write to peak areas file fp2.write(compo_peak.UID) if minutes: fp2.write(f",{float(compo_peak.rt / 60):.3f}") else: fp2.write(f",{compo_peak.rt:.3f}") for area in areas: if area is None: fp2.write(",NA") else: fp2.write(f",{area:.0f}") fp2.write("\n") fp1.close() fp2.close()
def write_ion_areas_csv(self, ms_file_name, minutes=True): try: fp1 = open(ms_file_name, "w") #dk except IOError: error("Cannot open output file for writing") # create header header = '"UID"|"RTavg"' for item in self.expr_code: expr_code = ('"%s"' % item) header = header + "|" + expr_code header = header + "\n" fp1.write(header) #dk for peak_idx in range(len(self.peakpos[0])): rts = [] ias = [] new_peak_list = [] avgrt = 0 countrt = 0 for align_idx in range(len(self.peakpos)): peak = self.peakpos[align_idx][peak_idx] if peak is not None: if minutes: rt = peak.get_rt() / 60.0 else: rt = peak.get_rt() rts.append(rt) ia = peak.get_ion_areas() ia.update((mass, math.floor(intensity)) for mass, intensity in ia.items()) sorted_ia = sorted(ia.iteritems(), key=operator.itemgetter(1), reverse=True) ias.append(sorted_ia) new_peak_list.append(peak) avgrt = avgrt + rt countrt = countrt + 1 else: rts.append(None) ias.append(None) if countrt > 0: avgrt = avgrt / countrt compo_peak = composite_peak(new_peak_list, minutes) peak_UID = compo_peak.get_UID() peak_UID_string = ('"%s"' % peak_UID) # write to ms file fp1.write(peak_UID_string) fp1.write("|%.3f" % avgrt) for ia in ias: if ia == None: fp1.write("|NA") else: fp1.write("|%s" % ia) fp1.write("\n") fp1.close()
def write_ion_areas_csv(self, ms_file_name, minutes=True): try: fp1 = open(ms_file_name, "w") #dk except IOError: error("Cannot open output file for writing") # create header header = '"UID"|"RTavg"' for item in self.expr_code: expr_code = ( '"%s"' % item ) header = header + "|" + expr_code header = header + "\n" fp1.write(header) #dk for peak_idx in range(len(self.peakpos[0])): rts = [] ias = [] new_peak_list = [] avgrt = 0 countrt = 0 for align_idx in range(len(self.peakpos)): peak = self.peakpos[align_idx][peak_idx] if peak is not None: if minutes: rt = peak.get_rt()/60.0 else: rt = peak.get_rt() rts.append(rt) ia = peak.get_ion_areas() ia.update( (mass, math.floor(intensity)) for mass, intensity in ia.items() ) sorted_ia = sorted(ia.iteritems(), key=operator.itemgetter(1), reverse=True) ias.append(sorted_ia) new_peak_list.append(peak) avgrt = avgrt + rt countrt = countrt + 1 else: rts.append(None) ias.append(None) if countrt > 0: avgrt = avgrt/countrt compo_peak = composite_peak(new_peak_list, minutes) peak_UID = compo_peak.get_UID() peak_UID_string = ( '"%s"' % peak_UID) # write to ms file fp1.write(peak_UID_string) fp1.write("|%.3f" % avgrt) for ia in ias: if ia == None: fp1.write("|NA") else: fp1.write("|%s" % ia) fp1.write("\n") fp1.close()
def write_csv(self, rt_file_name, area_file_name, minutes=True): """ @summary: Writes the alignment to CSV files This function writes two files: one containing the alignment of peak retention times and the other containing the alignment of peak areas. @param rt_file_name: The name for the retention time alignment file @type rt_file_name: StringType @param area_file_name: The name for the areas alignment file @type area_file_name: StringType @param minutes: An optional indicator whether to save retention times in minutes. If False, retention time will be saved in seconds @type minutes: BooleanType @author: Woon Wai Keen @author: Andrew Isaac @author: Vladimir Likic """ try: fp1 = open(rt_file_name, "w") fp2 = open(area_file_name, "w") except IOError: error("Cannot open output file for writing") # create header header = '"UID","RTavg"' for item in self.expr_code: expr_code = ('"%s"' % item) header = header + "," + expr_code header = header + "\n" # write headers fp1.write(header) fp2.write(header) # for each alignment position write alignment's peak and area for peak_idx in range(len(self.peakpos[0])): rts = [] areas = [] new_peak_list = [] avgrt = 0 countrt = 0 for align_idx in range(len(self.peakpos)): peak = self.peakpos[align_idx][peak_idx] if peak is not None: if minutes: rt = peak.get_rt() / 60.0 else: rt = peak.get_rt() rts.append(rt) areas.append(peak.get_area()) new_peak_list.append(peak) avgrt = avgrt + rt countrt = countrt + 1 else: rts.append(None) areas.append(None) if countrt > 0: avgrt = avgrt / countrt compo_peak = composite_peak(new_peak_list, minutes) peak_UID = compo_peak.get_UID() peak_UID_string = ('"%s"' % peak_UID) # write to retention times file fp1.write(peak_UID_string) fp1.write(",%.3f" % avgrt) for rt in rts: if rt == None: fp1.write(",NA") else: fp1.write(",%.3f" % rt) fp1.write("\n") # write to peak areas file fp2.write(peak_UID_string) fp2.write(",%.3f" % avgrt) for area in areas: if area == None: fp2.write(",NA") else: fp2.write(",%.4f" % area) fp2.write("\n") fp1.close() fp2.close()
def write_common_ion_csv(self, area_file_name, top_ion_list, minutes=True): """ @summary: Writes the alignment to CSV files This function writes two files: one containing the alignment of peak retention times and the other containing the alignment of peak areas. @param area_file_name: The name for the areas alignment file @type area_file_name: StringType @param top_ion_list: A list of the highest intensity common ion along the aligned peaks @type top_ion_list: ListType @param minutes: An optional indicator whether to save retention times in minutes. If False, retention time will be saved in seconds @type minutes: BooleanType @author: Woon Wai Keen @author: Andrew Isaac @author: Sean O'Callaghan @author: Vladimir Likic """ try: fp = open(area_file_name, "w") except IOError: error("Cannot open output file for writing") if top_ion_list == None: error("List of common ions must be supplied") # create header header = '"UID","RTavg", "Quant Ion"' for item in self.expr_code: expr_code = ('"%s"' % item) header = header + "," + expr_code header = header + "\n" # write headers fp.write(header) rtsums = [] rtcounts = [] # The following two arrays will become list of lists # such that: # areas = [ [align1_peak1, align2_peak1, .....,alignn_peak1] # [align1_peak2, ................................] # ............................................. # [align1_peakm,....................,alignn_peakm] ] areas = [] new_peak_lists = [] for peak_list in self.peakpos: index = 0 for peak in peak_list: # one the first iteration, populate the lists if len(areas) < len(peak_list): areas.append([]) new_peak_lists.append([]) rtsums.append(0) rtcounts.append(0) if peak is not None: rt = peak.get_rt() # get the area of the common ion for the peak # an area of 'na' shows that while the peak was # aligned, the common ion was not present area = peak.get_ion_area(top_ion_list[index]) areas[index].append(area) new_peak_lists[index].append(peak) # The following code to the else statement is # just for calculating the average rt rtsums[index] += rt rtcounts[index] += 1 else: areas[index].append(None) index += 1 out_strings = [] index = 0 # now write the strings for the file for area_list in areas: # write initial info: # peak unique id, peak average rt compo_peak = composite_peak(new_peak_lists[index], minutes) peak_UID = compo_peak.get_UID() peak_UID_string = ('"%s"' % peak_UID) rt_avg = rtsums[index] / rtcounts[index] out_strings.append(peak_UID_string + (",%.3f" % (rt_avg/60))+\ (",%d" % top_ion_list[index])) for area in area_list: if area is not None: out_strings[index] += (",%.4f" % area) else: out_strings[index] += (",NA") index += 1 # now write the file # print "length of areas[0]", len(areas[0]) # print "lenght of areas", len(areas) # print "length of out_strings", len(out_strings) for row in out_strings: fp.write(row + "\n") fp.close()
def write_mass_hunter_csv(self, out_file, top_ion_list):#, peak_list_name): """ @summary: Returns a csv file with ion ratios and UID @param out_file: name of the output file @type out_file: strType @param top_ion_list: a list of the common ions for each peak in the averaged peak list for the alignment @type top_ion_list: listType @return: a csv file with UID, common and qualifying ions and their ratios for mass hunter interpretation @rtype: fileType """ try: fp = open(out_file, "w") except IOError: error("Cannot open output file for writing") if top_ion_list == None: error("List of common ions must be supplied") # create header header = '"UID","Common Ion", "Qual Ion 1", "ratio QI1/CI", "Qual Ion 2", "ratio QI2/CI", "l window delta", "r window delta"' header = header + "\n" # write headers fp.write(header) rtsums = [] rtcounts = [] # The following two arrays will become list of lists # such that: # areas = [ [align1_peak1, align2_peak1, .....,alignn_peak1] # [align1_peak2, ................................] # ............................................. # [align1_peakm,....................,alignn_peakm] ] areas = [] new_peak_lists = [] rtmax = [] rtmin = [] for peak_list in self.peakpos: index = 0 for peak in peak_list: # on the first iteration, populate the lists if len(areas) < len(peak_list): areas.append([]) new_peak_lists.append([]) rtsums.append(0) rtcounts.append(0) rtmax.append(0.0) rtmin.append(0.0) if peak is not None: rt = peak.get_rt() # get the area of the common ion for the peak # an area of 'na' shows that while the peak was # aligned, the common ion was not present area = peak.get_ion_area(top_ion_list[index]) areas[index].append(area) new_peak_lists[index].append(peak) # The following code to the else statement is # just for calculating the average rt rtsums[index] += rt rtcounts[index] += 1 # quick workaround for weird problem when # attempting to set rtmin to max time above if rtmin[index] == 0.0: rtmin[index] = 5400.0 if rt > rtmax[index]: rtmax[index] = rt if rt < rtmin[index]: rtmin[index] = rt else: areas[index].append(None) index += 1 out_strings = [] compo_peaks = [] index = 0 # now write the strings for the file for area_list in areas: # write initial info: # peak unique id, peak average rt compo_peak = composite_peak(new_peak_lists[index], minutes=False) compo_peaks.append(compo_peak) peak_UID = compo_peak.get_UID() peak_UID_string = ( '"%s"' % peak_UID) #calculate the time from the leftmost peak to the average l_window_delta = compo_peak.get_rt() - rtmin[index] #print "l_window", l_window_delta, "rt", compo_peak.get_rt(), "rt_min", rtmin[index] r_window_delta = rtmax[index] - compo_peak.get_rt() common_ion = top_ion_list[index] qual_ion_1 = int(peak_UID_string.split('-')[0].strip('"')) qual_ion_2 = int(peak_UID_string.split('-')[1]) if qual_ion_1 == common_ion: qual_ion_1 = compo_peak.get_third_highest_mz() elif qual_ion_2 == common_ion: qual_ion_2 = compo_peak.get_third_highest_mz() else: pass ci_intensity = compo_peak.get_int_of_ion(common_ion) if ci_intensity == None: print "No Ci for peak", index q1_intensity = compo_peak.get_int_of_ion(qual_ion_1) q2_intensity = compo_peak.get_int_of_ion(qual_ion_2) try: q1_ci_ratio = float(q1_intensity)/float(ci_intensity) except(TypeError): # if no area available for that ion q1_ci_ratio = 0.0 except(ZeroDivisionError): #shouldn't happen but does!! q1_ci_ratio = 0.01 try: q2_ci_ratio = float(q2_intensity)/float(ci_intensity) except(TypeError): q2_ci_ratio = 0.0 except(ZeroDivisionError): #shouldn't happen, but does!! q2_ci_ratio = 0.01 out_strings.append(peak_UID + ',' + str(common_ion) + ',' + \ str(qual_ion_1) + \ (",%.1f" % (q1_ci_ratio*100))\ + ',' + str(qual_ion_2) + \ (",%.1f" % (q2_ci_ratio*100)) + (",%.2f" % ((l_window_delta+1.5)/60)) + (",%.2f" % ((r_window_delta+1.5)/60))) index += 1 # now write the file # print "length of areas[0]", len(areas[0]) # print "lenght of areas", len(areas) # print "length of out_strings", len(out_strings) for row in out_strings: fp.write(row +"\n") #dump_object(compo_peaks, peak_list_name) fp.close()
def write_transposed_output(self, excel_file_name, minutes=True): wb = Workbook() ws1 = wb.create_sheet(title='Aligned RT') ws2 = wb.create_sheet(title='Aligned Area') ws1['A1'] = "Peak" ws1['A2'] = "RTavg" ws2['A1'] = "Peak" ws2['A2'] = "RTavg" style_outlier = PatternFill(fill_type="solid", fgColor="FFAE19", bgColor="FFAE19") # write column with sample IDs for i,item in enumerate(self.expr_code): currcell = ws1.cell( column = 1, row = i+3, value= "%s" % item ) currcell = ws2.cell( column = 1, row = i+3, value= "%s" % item ) # for each alignment position write alignment's peak and area for peak_idx in range(len(self.peakpos[0])): # loop through peak lists new_peak_list = [] # this will contain a list of tuples of form (peak, col, row), but only non-NA peaks cell_col, cell_row = 0,0 for align_idx in range(len(self.peakpos)): # loops through samples peak = self.peakpos[align_idx][peak_idx] cell_col = 2+peak_idx cell_row = 3+align_idx if peak is not None: if minutes: rt = peak.get_rt()/60.0 else: rt = peak.get_rt() area = peak.get_area() #these are the col,row coords of the peak in the output matrix new_peak_list.append((peak,cell_col,cell_row)) # write the RT into the cell in the excel file currcell1 = ws1.cell( column = cell_col, row = cell_row, value=round(rt, 3) ) currcell2 = ws2.cell( column = cell_col, row = cell_row, value=round(area, 3) ) # get the mini-mass spec for this peak, and divide the ion intensities by 1000 to shorten them ia = peak.get_ion_areas() ia.update( (mass, int(intensity/1000)) for mass, intensity in ia.items() ) sorted_ia = sorted(ia.iteritems(), key=operator.itemgetter(1), reverse=True) # write the peak area and mass spec into the comment for the cell comment = Comment("Area: %.0f | MassSpec: %s" % (area,sorted_ia), 'dave') currcell1.comment = comment else: rt = 'NA' area = 'NA' currcell1 = ws1.cell( column = cell_col, row = cell_row, value='NA' ) currcell2 = ws2.cell( column = cell_col, row = cell_row, value='NA' ) comment = Comment("Area: NA", 'dave') currcell1.comment = comment compo_peak = composite_peak( list(p[0] for p in new_peak_list), minutes) # this method will create the compo peak, aqnd also mark outlier peaks with a bool isoutlier peak_UID = compo_peak.get_UID() peak_UID_string = ( '"%s"' % peak_UID) currcell = ws1.cell( column = 2+peak_idx, row = 1, value = peak_UID_string ) currcell = ws1.cell( column = 2+peak_idx, row = 2, value = "%.3f" % float(compo_peak.get_rt()/60) ) currcell = ws2.cell( column = 2+peak_idx, row = 1, value = peak_UID_string ) currcell = ws2.cell( column = 2+peak_idx, row = 2, value = "%.3f" % float(compo_peak.get_rt()/60) ) # highlight outlier cells in the current peak list for p in new_peak_list: if p[0].isoutlier: #ws[ get_column_letter(p[1]) + str(p[2]) ].style = style_outlier ws1.cell(column = p[1], row = p[2]).fill = style_outlier ws2.cell(column = p[1], row = p[2]).fill = style_outlier wb.save(excel_file_name)
def write_common_ion_csv(self, area_file_name: Union[str, pathlib.Path], top_ion_list: List, minutes: bool = True): """ Writes the alignment to CSV files This function writes two files: one containing the alignment of peak retention times and the other containing the alignment of peak areas. :param area_file_name: The name for the areas alignment file :type area_file_name: str or os.PathLike :param top_ion_list: A list of the highest intensity common ion along the aligned peaks :type top_ion_list: ~collections.abc.Sequence :param minutes: An optional indicator whether to save retention times in minutes. If False, retention time will be saved in seconds :type minutes: bool, optional :author: Woon Wai Keen :author: Andrew Isaac :author: Sean O'Callaghan :author: Vladimir Likic :author: Dominic Davis-Foster (pathlib support) """ # TODO: minutes currently does nothing if not is_path(area_file_name): raise TypeError( "'area_file_name' must be a string or a PathLike object") if not is_sequence_of(top_ion_list, Number): raise TypeError("'top_ion_list' must be a Sequence of Numbers") area_file_name = prepare_filepath(area_file_name) with area_file_name.open("w") as fp: # create header header = ['"UID"', '"RTavg"', '"Quant Ion"'] for item in self.expr_code: header.append(f'"{item}"') # write headers fp.write(",".join(header) + "\n") rtsums = [] rtcounts = [] # The following two arrays will become list of lists # such that: # areas = [ [align1_peak1, align2_peak1, .....,alignn_peak1] # [align1_peak2, ................................] # ............................................. # [align1_peakm,....................,alignn_peakm] ] areas: List[List] = [] new_peak_lists: List[List[Peak]] = [] for peak_list in self.peakpos: index = 0 for peak in peak_list: # one the first iteration, populate the lists if len(areas) < len(peak_list): areas.append([]) new_peak_lists.append([]) rtsums.append(0) rtcounts.append(0) if peak is not None: rt = peak.rt # get the area of the common ion for the peak # an area of 'na' shows that while the peak was # aligned, the common ion was not present area = peak.get_ion_area(top_ion_list[index]) areas[index].append(area) new_peak_lists[index].append(peak) # The following code to the else statement is # just for calculating the average rt rtsums[index] += rt rtcounts[index] += 1 else: areas[index].append(None) index += 1 out_strings = [] index = 0 # now write the strings for the file for area_list in areas: # write initial info: # peak unique id, peak average rt compo_peak = composite_peak(new_peak_lists[index]) peak_UID = compo_peak.UID peak_UID_string = f'"{peak_UID}"' rt_avg = rtsums[index] / rtcounts[index] out_strings.append( f"{peak_UID_string},{rt_avg / 60:.3f},{top_ion_list[index]:f}" ) for area in area_list: if area is not None: out_strings[index] += f",{area:.4f}" else: out_strings[index] += ",NA" index += 1 # now write the file # print("length of areas[0]", len(areas[0])) # print("length of areas", len(areas)) # print("length of out_strings", len(out_strings)) for row in out_strings: fp.write(row + "\n")
def write_csv(self, rt_file_name, area_file_name, minutes=True): """ @summary: Writes the alignment to CSV files This function writes two files: one containing the alignment of peak retention times and the other containing the alignment of peak areas. @param rt_file_name: The name for the retention time alignment file @type rt_file_name: StringType @param area_file_name: The name for the areas alignment file @type area_file_name: StringType @param minutes: An optional indicator whether to save retention times in minutes. If False, retention time will be saved in seconds @type minutes: BooleanType @author: Woon Wai Keen @author: Andrew Isaac @author: Vladimir Likic """ try: fp1 = open(rt_file_name, "w") fp2 = open(area_file_name, "w") except IOError: error("Cannot open output file for writing") # create header header = '"UID","RTavg"' for item in self.expr_code: expr_code = ( '"%s"' % item ) header = header + "," + expr_code header = header + "\n" # write headers fp1.write(header) fp2.write(header) # for each alignment position write alignment's peak and area for peak_idx in range(len(self.peakpos[0])): rts = [] areas = [] new_peak_list = [] avgrt = 0 countrt = 0 for align_idx in range(len(self.peakpos)): peak = self.peakpos[align_idx][peak_idx] if peak is not None: if minutes: rt = peak.get_rt()/60.0 else: rt = peak.get_rt() rts.append(rt) areas.append(peak.get_area()) new_peak_list.append(peak) avgrt = avgrt + rt countrt = countrt + 1 else: rts.append(None) areas.append(None) if countrt > 0: avgrt = avgrt/countrt compo_peak = composite_peak(new_peak_list, minutes) peak_UID = compo_peak.get_UID() peak_UID_string = ( '"%s"' % peak_UID) # write to retention times file fp1.write(peak_UID_string) fp1.write(",%.3f" % avgrt) for rt in rts: if rt == None: fp1.write(",NA") else: fp1.write(",%.3f" % rt) fp1.write("\n") # write to peak areas file fp2.write(peak_UID_string) fp2.write(",%.3f" % avgrt) for area in areas: if area == None: fp2.write(",NA") else: fp2.write(",%.4f" % area) fp2.write("\n") fp1.close() fp2.close()
def test_write_csv(A1, tmp_pathplus): A1.write_csv(tmp_pathplus / "alignment_rt.csv", tmp_pathplus / "alignment_area.csv") # Read alignment_rt.csv and alignment_area.csv and check values assert (tmp_pathplus / "alignment_rt.csv").exists() assert (tmp_pathplus / "alignment_area.csv").exists() rt_csv = list(csv.reader((tmp_pathplus / "alignment_rt.csv").open())) area_csv = list(csv.reader((tmp_pathplus / "alignment_area.csv").open())) assert rt_csv[0][0:2] == area_csv[0][0:2] == ["UID", "RTavg"] assert rt_csv[0][2:] == area_csv[0][2:] == A1.expr_code for peak_idx in range(len( A1.peakpos[0])): # loop through peak lists (rows) new_peak_list = [] for align_idx in range(len(A1.peakpos)): peak = A1.peakpos[align_idx][peak_idx] if peak is not None: if peak.rt is None or numpy.isnan(peak.rt): assert rt_csv[peak_idx + 1][align_idx + 2] == "NA" else: assert rt_csv[peak_idx + 1][align_idx + 2] == f"{peak.rt / 60:.3f}" if peak.area is None or numpy.isnan(peak.area): assert area_csv[peak_idx + 1][align_idx + 2] == "NA" else: assert area_csv[peak_idx + 1][align_idx + 2] == f"{peak.area:.0f}" new_peak_list.append(peak) compo_peak = composite_peak(new_peak_list) assert compo_peak is not None assert rt_csv[peak_idx + 1][0] == area_csv[peak_idx + 1][0] == compo_peak.UID assert rt_csv[peak_idx + 1][1] == area_csv[ peak_idx + 1][1] == f"{float(compo_peak.rt / 60):.3f}" A1.write_csv( tmp_pathplus / "alignment_rt_seconds.csv", tmp_pathplus / "alignment_area_seconds.csv", minutes=False, ) # Read alignment_rt_seconds.csv and alignment_area_seconds.csv and check values assert (tmp_pathplus / "alignment_rt_seconds.csv").exists() assert (tmp_pathplus / "alignment_area_seconds.csv").exists() rt_csv = list( csv.reader((tmp_pathplus / "alignment_rt_seconds.csv").open())) area_csv = list( csv.reader((tmp_pathplus / "alignment_area_seconds.csv").open())) assert rt_csv[0][0:2] == area_csv[0][0:2] == ["UID", "RTavg"] assert rt_csv[0][2:] == area_csv[0][2:] == A1.expr_code for peak_idx in range(len( A1.peakpos[0])): # loop through peak lists (rows) new_peak_list = [] for align_idx in range(len(A1.peakpos)): peak = A1.peakpos[align_idx][peak_idx] if peak is not None: if peak.rt is None or numpy.isnan(peak.rt): assert rt_csv[peak_idx + 1][align_idx + 2] == "NA" else: assert rt_csv[peak_idx + 1][align_idx + 2] == f"{peak.rt:.3f}" if peak.area is None or numpy.isnan(peak.area): assert area_csv[peak_idx + 1][align_idx + 2] == "NA" else: assert area_csv[peak_idx + 1][align_idx + 2] == f"{peak.area:.0f}" new_peak_list.append(peak) compo_peak = composite_peak(new_peak_list) assert compo_peak is not None assert rt_csv[peak_idx + 1][0] == area_csv[peak_idx + 1][0] == compo_peak.UID assert rt_csv[peak_idx + 1][1] == area_csv[peak_idx + 1][1] == f"{float(compo_peak.rt):.3f}"
def write_mass_hunter_csv(self, out_file, top_ion_list): #, peak_list_name): """ @summary: Returns a csv file with ion ratios and UID @param out_file: name of the output file @type out_file: strType @param top_ion_list: a list of the common ions for each peak in the averaged peak list for the alignment @type top_ion_list: listType @return: a csv file with UID, common and qualifying ions and their ratios for mass hunter interpretation @rtype: fileType """ try: fp = open(out_file, "w") except IOError: error("Cannot open output file for writing") if top_ion_list == None: error("List of common ions must be supplied") # create header header = '"UID","Common Ion", "Qual Ion 1", "ratio QI1/CI", "Qual Ion 2", "ratio QI2/CI", "l window delta", "r window delta"' header = header + "\n" # write headers fp.write(header) rtsums = [] rtcounts = [] # The following two arrays will become list of lists # such that: # areas = [ [align1_peak1, align2_peak1, .....,alignn_peak1] # [align1_peak2, ................................] # ............................................. # [align1_peakm,....................,alignn_peakm] ] areas = [] new_peak_lists = [] rtmax = [] rtmin = [] for peak_list in self.peakpos: index = 0 for peak in peak_list: # on the first iteration, populate the lists if len(areas) < len(peak_list): areas.append([]) new_peak_lists.append([]) rtsums.append(0) rtcounts.append(0) rtmax.append(0.0) rtmin.append(0.0) if peak is not None: rt = peak.get_rt() # get the area of the common ion for the peak # an area of 'na' shows that while the peak was # aligned, the common ion was not present area = peak.get_ion_area(top_ion_list[index]) areas[index].append(area) new_peak_lists[index].append(peak) # The following code to the else statement is # just for calculating the average rt rtsums[index] += rt rtcounts[index] += 1 # quick workaround for weird problem when # attempting to set rtmin to max time above if rtmin[index] == 0.0: rtmin[index] = 5400.0 if rt > rtmax[index]: rtmax[index] = rt if rt < rtmin[index]: rtmin[index] = rt else: areas[index].append(None) index += 1 out_strings = [] compo_peaks = [] index = 0 # now write the strings for the file for area_list in areas: # write initial info: # peak unique id, peak average rt compo_peak = composite_peak(new_peak_lists[index], minutes=False) compo_peaks.append(compo_peak) peak_UID = compo_peak.get_UID() peak_UID_string = ('"%s"' % peak_UID) #calculate the time from the leftmost peak to the average l_window_delta = compo_peak.get_rt() - rtmin[index] #print "l_window", l_window_delta, "rt", compo_peak.get_rt(), "rt_min", rtmin[index] r_window_delta = rtmax[index] - compo_peak.get_rt() common_ion = top_ion_list[index] qual_ion_1 = int(peak_UID_string.split('-')[0].strip('"')) qual_ion_2 = int(peak_UID_string.split('-')[1]) if qual_ion_1 == common_ion: qual_ion_1 = compo_peak.get_third_highest_mz() elif qual_ion_2 == common_ion: qual_ion_2 = compo_peak.get_third_highest_mz() else: pass ci_intensity = compo_peak.get_int_of_ion(common_ion) if ci_intensity == None: print "No Ci for peak", index q1_intensity = compo_peak.get_int_of_ion(qual_ion_1) q2_intensity = compo_peak.get_int_of_ion(qual_ion_2) try: q1_ci_ratio = float(q1_intensity) / float(ci_intensity) except (TypeError): # if no area available for that ion q1_ci_ratio = 0.0 except (ZeroDivisionError): #shouldn't happen but does!! q1_ci_ratio = 0.01 try: q2_ci_ratio = float(q2_intensity) / float(ci_intensity) except (TypeError): q2_ci_ratio = 0.0 except (ZeroDivisionError): #shouldn't happen, but does!! q2_ci_ratio = 0.01 out_strings.append(peak_UID + ',' + str(common_ion) + ',' + \ str(qual_ion_1) + \ (",%.1f" % (q1_ci_ratio*100))\ + ',' + str(qual_ion_2) + \ (",%.1f" % (q2_ci_ratio*100)) + (",%.2f" % ((l_window_delta+1.5)/60)) + (",%.2f" % ((r_window_delta+1.5)/60))) index += 1 # now write the file # print "length of areas[0]", len(areas[0]) # print "lenght of areas", len(areas) # print "length of out_strings", len(out_strings) for row in out_strings: fp.write(row + "\n") #dump_object(compo_peaks, peak_list_name) fp.close()
def write_mass_hunter_csv( alignment: Alignment, file_name: PathLike, top_ion_list: List[int], ): # , peak_list_name): """ Creates a csv file with UID, common and qualifying ions and their ratios for mass hunter interpretation. :param alignment: alignment object to write to file :param file_name: name of the output file. :param top_ion_list: a list of the common ions for each peak in the averaged peak list for the alignment. """ # noqa: D400 if not is_path(file_name): raise TypeError("'file_name' must be a string or a PathLike object") file_name = prepare_filepath(file_name) fp = file_name.open('w', encoding="UTF-8") if top_ion_list is None: raise ValueError("List of common ions must be supplied") # write headers fp.write( '"UID","Common Ion","Qual Ion 1","ratio QI1/CI","Qual Ion 2",' '"ratio QI2/CI","l window delta","r window delta"\n' ) rtsums: List[float] = [] rtcounts = [] # The following two arrays will become list of lists # such that: # areas = [ [align1_peak1, align2_peak1, .....,alignn_peak1] # [align1_peak2, ................................] # ............................................. # [align1_peakm,....................,alignn_peakm] ] areas = [] # type: ignore new_peak_lists = [] # type: ignore rtmax = [] rtmin = [] for peak_list in alignment.peakpos: index = 0 for peak in peak_list: # on the first iteration, populate the lists if len(areas) < len(peak_list): areas.append([]) new_peak_lists.append([]) rtsums.append(0) rtcounts.append(0) rtmax.append(0.0) rtmin.append(0.0) if peak is not None: rt = peak.rt # get the area of the common ion for the peak # an area of 'na' shows that while the peak was # aligned, the common ion was not present area = peak.get_ion_area(top_ion_list[index]) areas[index].append(area) new_peak_lists[index].append(peak) # The following code to the else statement is # just for calculating the average rt rtsums[index] += rt rtcounts[index] += 1 # quick workaround for weird problem when # attempting to set rtmin to max time above if rtmin[index] == 0.0: rtmin[index] = 5400.0 if rt > rtmax[index]: rtmax[index] = rt if rt < rtmin[index]: rtmin[index] = rt else: areas[index].append(None) index += 1 out_strings = [] compo_peaks = [] index = 0 # now write the strings for the file for area_list in areas: # write initial info: # peak unique id, peak average rt compo_peak = composite_peak(new_peak_lists[index]) if compo_peak is None: continue compo_peaks.append(compo_peak) peak_UID = compo_peak.UID peak_UID_string = f'"{peak_UID}"' # calculate the time from the leftmost peak to the average l_window_delta = compo_peak.rt - rtmin[index] # print("l_window", l_window_delta, "rt", compo_peak.rt, "rt_min", rtmin[index]) r_window_delta = rtmax[index] - compo_peak.rt common_ion = top_ion_list[index] qual_ion_1 = int(peak_UID_string.split('-')[0].strip('"')) qual_ion_2 = int(peak_UID_string.split('-')[1]) if qual_ion_1 == common_ion: qual_ion_1 = compo_peak.get_third_highest_mz() elif qual_ion_2 == common_ion: qual_ion_2 = compo_peak.get_third_highest_mz() else: pass ci_intensity = compo_peak.get_int_of_ion(common_ion) q1_intensity = compo_peak.get_int_of_ion(qual_ion_1) q2_intensity = compo_peak.get_int_of_ion(qual_ion_2) try: q1_ci_ratio = float(q1_intensity) / float(ci_intensity) except TypeError: # if no area available for that ion q1_ci_ratio = 0.0 except ZeroDivisionError: # shouldn't happen but does!! q1_ci_ratio = 0.01 try: q2_ci_ratio = float(q2_intensity) / float(ci_intensity) except TypeError: q2_ci_ratio = 0.0 except ZeroDivisionError: # shouldn't happen, but does!! q2_ci_ratio = 0.01 out_strings.append( ','.join([ peak_UID, f"{common_ion}", f"{qual_ion_1}", f"{q1_ci_ratio * 100:.1f}", f"{qual_ion_2}", f"{q2_ci_ratio * 100:.1f}", f"{(l_window_delta + 1.5) / 60:.2f}", f"{(r_window_delta + 1.5) / 60:.2f}", ]) ) index += 1 # now write the file # print("length of areas[0]", len(areas[0])) # print("lenght of areas", len(areas)) # print("length of out_strings", len(out_strings)) for row in out_strings: fp.write(f"{row}\n") # dump_object(compo_peaks, peak_list_name) fp.close()
def write_transposed_output( alignment: Alignment, file_name: PathLike, minutes: bool = True, ): """ :param alignment: :class:`pyms.DPA.Alignment.Alignment` object to write to file :param file_name: The name of the file :param minutes: """ if not is_path(file_name): raise TypeError("'file_name' must be a string or a PathLike object") file_name = prepare_filepath(file_name) wb = Workbook() ws1 = wb.create_sheet(title="Aligned RT") ws2 = wb.create_sheet(title="Aligned Area") ws1["A1"] = "Peak" ws1["A2"] = "RTavg" ws2["A1"] = "Peak" ws2["A2"] = "RTavg" style_outlier = PatternFill(fill_type="solid", fgColor="FFAE19", bgColor="FFAE19") # write column with sample IDs for i, item in enumerate(alignment.expr_code): ws1.cell(column=1, row=i + 3, value=f"{item}") ws2.cell(column=1, row=i + 3, value=f"{item}") # for each alignment position write alignment's peak and area for peak_idx in range(len(alignment.peakpos[0])): # loop through peak lists new_peak_list = [] # this will contain a list of tuples of form (peak, col, row), but only non-NA peaks for align_idx in range(len(alignment.peakpos)): # loops through samples peak = alignment.peakpos[align_idx][peak_idx] cell_col = 2 + peak_idx cell_row = 3 + align_idx if peak is not None: if minutes: rt = peak.rt / 60.0 else: rt = peak.rt area = peak.area # these are the col,row coords of the peak in the output matrix new_peak_list.append((peak, cell_col, cell_row)) # write the RT into the cell in the excel file currcell1 = ws1.cell(column=cell_col, row=cell_row, value=round(rt, 3)) ws2.cell(column=cell_col, row=cell_row, value=round(area, 3)) # type: ignore # get the mini-mass spec for this peak, and divide the ion intensities by 1000 to shorten them ia = peak.ion_areas ia.update((mass, int(intensity / 1000)) for mass, intensity in ia.items()) sorted_ia = sorted(ia.items(), key=operator.itemgetter(1), reverse=True) # write the peak area and mass spec into the comment for the cell comment = Comment(f"Area: {area:.0f} | MassSpec: {sorted_ia}", "dave") currcell1.comment = comment else: # rt = 'NA' # area = 'NA' currcell1 = ws1.cell(column=cell_col, row=cell_row, value="NA") ws2.cell(column=cell_col, row=cell_row, value="NA") comment = Comment("Area: NA", "dave") currcell1.comment = comment # this method will create the compo peak, and also mark outlier peaks with a bool is_outlier compo_peak = composite_peak(list(p[0] for p in new_peak_list)) if compo_peak is not None: ws1.cell(column=2 + peak_idx, row=1, value=f'"{compo_peak.UID}"') ws1.cell(column=2 + peak_idx, row=2, value=f"{float(compo_peak.rt / 60):.3f}") ws2.cell(column=2 + peak_idx, row=1, value=f'"{compo_peak.UID}"') ws2.cell(column=2 + peak_idx, row=2, value=f"{float(compo_peak.rt / 60):.3f}") # highlight outlier cells in the current peak list for p in new_peak_list: if p[0].is_outlier: # ws[ get_column_letter(p[1]) + str(p[2]) ].style = style_outlier ws1.cell(column=p[1], row=p[2]).fill = style_outlier ws2.cell(column=p[1], row=p[2]).fill = style_outlier wb.save(file_name)
def write_excel( alignment: Alignment, file_name: PathLike, minutes: bool = True, ): """ Writes the alignment to an excel file, with colouring showing possible mis-alignments. :param alignment: :class:`pyms.DPA.Alignment.Alignment` object to write to file. :param file_name: The name for the retention time alignment file. :param minutes: Whether to save retention times in minutes. If :py:obj:`False`, retention time will be saved in seconds. :author: David Kainer """ if not is_path(file_name): raise TypeError("'file_name' must be a string or a PathLike object") file_name = prepare_filepath(file_name) wb = Workbook() ws = wb.active ws.title = "Aligned RT" # create header row ws["A1"] = "UID" ws["B1"] = "RTavg" for i, item in enumerate(alignment.expr_code): currcell = ws.cell(row=1, column=i + 3, value=f"{item}") comment = Comment("sample " + str(i), "dave") currcell.comment = comment # for each alignment position write alignment's peak and area for peak_idx in range(len(alignment.peakpos[0])): # loop through peak lists (rows) new_peak_list = [] for align_idx in range(len(alignment.peakpos)): # loops through samples (columns) peak = alignment.peakpos[align_idx][peak_idx] if peak is not None: if minutes: rt = peak.rt / 60.0 else: rt = peak.rt area = peak.area new_peak_list.append(peak) # write the RT into the cell in the excel file currcell = ws.cell(row=2 + peak_idx, column=3 + align_idx, value=round(rt, 3)) # get the mini-mass spec for this peak, and divide the ion intensities by 1000 to shorten them ia = peak.ion_areas ia.update((mass, int(intensity / 1000)) for mass, intensity in ia.items()) sorted_ia = sorted(ia.items(), key=operator.itemgetter(1), reverse=True) # write the peak area and mass spec into the comment for the cell comment = Comment(f"Area: {area:.0f} | MassSpec: {sorted_ia}", "dave") # currcell.number_format currcell.comment = comment else: # rt = 'NA' # area = 'NA' currcell = ws.cell(row=2 + peak_idx, column=3 + align_idx, value="NA") comment = Comment("Area: NA", "dave") # currcell.number_format currcell.comment = comment compo_peak = composite_peak(new_peak_list) if compo_peak is not None: peak_UID = compo_peak.UID peak_UID_string = f'"{peak_UID}"' ws.cell(row=2 + peak_idx, column=1, value=peak_UID_string) ws.cell(row=2 + peak_idx, column=2, value=f"{float(compo_peak.rt / 60):.3f}") # colour the cells in each row based on their RT percentile for that row i = 0 for row in ws.rows: i += 1 cell_range = ("{0}" + str(i) + ":{1}" + str(i)).format(get_column_letter(3), get_column_letter(len(row))) ws.conditional_formatting.add( cell_range, ColorScaleRule( start_type="percentile", start_value=1, start_color="E5FFCC", mid_type="percentile", mid_value=50, mid_color="FFFFFF", end_type="percentile", end_value=99, end_color="FFE5CC" ), ) wb.save(file_name)
def write_ion_areas_csv(self, ms_file_name: Union[str, pathlib.Path], minutes: bool = True): """ Write Ion Areas to CSV File :param ms_file_name: The name of the file :type ms_file_name: str, PathLike :param minutes: :type minutes: bool :author: David Kainer :author: Dominic Davis-Foster (pathlib support) """ if not is_path(ms_file_name): raise TypeError( "'ms_file_name' must be a string or a PathLike object") ms_file_name = prepare_filepath(ms_file_name) with ms_file_name.open("w") as fp1: # create header header = ['"UID"', '"RTavg"'] for item in self.expr_code: header.append(f'"{item}"') # write headers fp1.write("|".join(header) + "\n") for peak_idx in range(len(self.peakpos[0])): ias = [] new_peak_list = [] for align_idx in range(len(self.peakpos)): peak = self.peakpos[align_idx][peak_idx] if peak is not None: ia = peak.ion_areas ia.update((mass, math.floor(intensity)) for mass, intensity in ia.items()) sorted_ia = sorted(ia.items(), key=operator.itemgetter(1), reverse=True) ias.append(sorted_ia) new_peak_list.append(peak) compo_peak = composite_peak(new_peak_list) # write to ms file fp1.write(compo_peak.UID) if minutes: fp1.write(f"|{compo_peak.rt/60:.3f}") else: fp1.write(f"|{compo_peak.rt:.3f}") for ia in ias: if ia is None: fp1.write("|NA") else: fp1.write(f"|{ia}") fp1.write("\n")
def write_csv_dk(self, rt_file_name, area_file_name, minutes=True): """ @summary: Writes the alignment to CSV files, but excluded outliers from the calculation of composite peak This function writes two files: one containing the alignment of peak retention times and the other containing the alignment of peak areas. @param rt_file_name: The name for the retention time alignment file @type rt_file_name: StringType @param area_file_name: The name for the areas alignment file @type area_file_name: StringType @param minutes: An optional indicator whether to save retention times in minutes. If False, retention time will be saved in seconds @type minutes: BooleanType @author: David Kainer """ try: fp1 = open(rt_file_name, "w") fp2 = open(area_file_name, "w") except IOError: error("Cannot open output file for writing") # create header header = '"UID","RTavg"' for item in self.expr_code: expr_code = ('"%s"' % item) header = header + "," + expr_code header = header + "\n" # write headers fp1.write(header) fp2.write(header) # for each alignment position write alignment's peak and area for peak_idx in range(len( self.peakpos[0])): # loop through peak lists (rows) rts = [] areas = [] new_peak_list = [] for align_idx in range(len( self.peakpos)): # loops through samples (columns) peak = self.peakpos[align_idx][peak_idx] if peak is not None: if minutes: rt = peak.get_rt() / 60.0 else: rt = peak.get_rt() rts.append(rt) areas.append(peak.get_area()) new_peak_list.append(peak) else: rts.append(numpy.nan) areas.append(None) compo_peak = composite_peak(new_peak_list, minutes) peak_UID = compo_peak.get_UID() peak_UID_string = ('"%s"' % peak_UID) # write to retention times file fp1.write(peak_UID_string) fp1.write(",%.3f" % float(compo_peak.get_rt() / 60)) for rt in rts: if numpy.isnan(rt): fp1.write(",NA") else: fp1.write(",%.3f" % rt) fp1.write("\n") # write to peak areas file fp2.write(peak_UID_string) fp2.write(",%.3f" % float(compo_peak.get_rt() / 60)) for area in areas: if area == None: fp2.write(",NA") else: fp2.write(",%.0f" % area) fp2.write("\n") fp1.close() fp2.close()
def write_common_ion_csv(self, area_file_name, top_ion_list, minutes=True): """ @summary: Writes the alignment to CSV files This function writes two files: one containing the alignment of peak retention times and the other containing the alignment of peak areas. @param area_file_name: The name for the areas alignment file @type area_file_name: StringType @param top_ion_list: A list of the highest intensity common ion along the aligned peaks @type top_ion_list: ListType @param minutes: An optional indicator whether to save retention times in minutes. If False, retention time will be saved in seconds @type minutes: BooleanType @author: Woon Wai Keen @author: Andrew Isaac @author: Sean O'Callaghan @author: Vladimir Likic """ try: fp = open(area_file_name, "w") except IOError: error("Cannot open output file for writing") if top_ion_list == None: error("List of common ions must be supplied") # create header header = '"UID","RTavg", "Quant Ion"' for item in self.expr_code: expr_code = ( '"%s"' % item ) header = header + "," + expr_code header = header + "\n" # write headers fp.write(header) rtsums = [] rtcounts = [] # The following two arrays will become list of lists # such that: # areas = [ [align1_peak1, align2_peak1, .....,alignn_peak1] # [align1_peak2, ................................] # ............................................. # [align1_peakm,....................,alignn_peakm] ] areas = [] new_peak_lists = [] for peak_list in self.peakpos: index = 0 for peak in peak_list: # one the first iteration, populate the lists if len(areas) < len(peak_list): areas.append([]) new_peak_lists.append([]) rtsums.append(0) rtcounts.append(0) if peak is not None: rt = peak.get_rt() # get the area of the common ion for the peak # an area of 'na' shows that while the peak was # aligned, the common ion was not present area = peak.get_ion_area(top_ion_list[index]) areas[index].append(area) new_peak_lists[index].append(peak) # The following code to the else statement is # just for calculating the average rt rtsums[index] += rt rtcounts[index] += 1 else: areas[index].append(None) index += 1 out_strings = [] index = 0 # now write the strings for the file for area_list in areas: # write initial info: # peak unique id, peak average rt compo_peak = composite_peak(new_peak_lists[index], minutes) peak_UID = compo_peak.get_UID() peak_UID_string = ( '"%s"' % peak_UID) rt_avg = rtsums[index]/rtcounts[index] out_strings.append(peak_UID_string + (",%.3f" % (rt_avg/60))+\ (",%d" % top_ion_list[index])) for area in area_list: if area is not None: out_strings[index] += (",%.4f" % area) else: out_strings[index] += (",NA") index += 1 # now write the file # print "length of areas[0]", len(areas[0]) # print "lenght of areas", len(areas) # print "length of out_strings", len(out_strings) for row in out_strings: fp.write(row +"\n") fp.close()
def write_transposed_output(self, excel_file_name, minutes=True): wb = Workbook() ws1 = wb.create_sheet(title='Aligned RT') ws2 = wb.create_sheet(title='Aligned Area') ws1['A1'] = "Peak" ws1['A2'] = "RTavg" ws2['A1'] = "Peak" ws2['A2'] = "RTavg" style_outlier = PatternFill(fill_type="solid", fgColor="FFAE19", bgColor="FFAE19") # write column with sample IDs for i, item in enumerate(self.expr_code): currcell = ws1.cell(column=1, row=i + 3, value="%s" % item) currcell = ws2.cell(column=1, row=i + 3, value="%s" % item) # for each alignment position write alignment's peak and area for peak_idx in range(len(self.peakpos[0])): # loop through peak lists new_peak_list = [ ] # this will contain a list of tuples of form (peak, col, row), but only non-NA peaks cell_col, cell_row = 0, 0 for align_idx in range(len(self.peakpos)): # loops through samples peak = self.peakpos[align_idx][peak_idx] cell_col = 2 + peak_idx cell_row = 3 + align_idx if peak is not None: if minutes: rt = peak.get_rt() / 60.0 else: rt = peak.get_rt() area = peak.get_area() #these are the col,row coords of the peak in the output matrix new_peak_list.append((peak, cell_col, cell_row)) # write the RT into the cell in the excel file currcell1 = ws1.cell(column=cell_col, row=cell_row, value=round(rt, 3)) currcell2 = ws2.cell(column=cell_col, row=cell_row, value=round(area, 3)) # get the mini-mass spec for this peak, and divide the ion intensities by 1000 to shorten them ia = peak.get_ion_areas() ia.update((mass, int(intensity / 1000)) for mass, intensity in ia.items()) sorted_ia = sorted(ia.iteritems(), key=operator.itemgetter(1), reverse=True) # write the peak area and mass spec into the comment for the cell comment = Comment( "Area: %.0f | MassSpec: %s" % (area, sorted_ia), 'dave') currcell1.comment = comment else: rt = 'NA' area = 'NA' currcell1 = ws1.cell(column=cell_col, row=cell_row, value='NA') currcell2 = ws2.cell(column=cell_col, row=cell_row, value='NA') comment = Comment("Area: NA", 'dave') currcell1.comment = comment compo_peak = composite_peak( list(p[0] for p in new_peak_list), minutes ) # this method will create the compo peak, aqnd also mark outlier peaks with a bool isoutlier peak_UID = compo_peak.get_UID() peak_UID_string = ( "%s" % peak_UID ) #JT: removed nested "" to make it easier to work with R currcell = ws1.cell(column=2 + peak_idx, row=1, value=peak_UID_string) currcell = ws1.cell(column=2 + peak_idx, row=2, value="%.3f" % float(compo_peak.get_rt() / 60)) currcell = ws2.cell(column=2 + peak_idx, row=1, value=peak_UID_string) currcell = ws2.cell(column=2 + peak_idx, row=2, value="%.3f" % float(compo_peak.get_rt() / 60)) # highlight outlier cells in the current peak list for p in new_peak_list: if p[0].isoutlier: #ws[ get_column_letter(p[1]) + str(p[2]) ].style = style_outlier ws1.cell(column=p[1], row=p[2]).fill = style_outlier ws2.cell(column=p[1], row=p[2]).fill = style_outlier wb.save(excel_file_name)
def write_excel(self, excel_file_name, minutes=True): """ @summary: Writes the alignment to an excel file, with colouring showing possible mis-alignments @param excel_file_name: The name for the retention time alignment file @type excel_file_name: StringType @param minutes: An optional indicator whether to save retention times in minutes. If False, retention time will be saved in seconds @type minutes: BooleanType @author: David Kainer """ wb = Workbook() ws = wb.active ws.title = "Aligned RT" # create header row ws['A1'] = "UID" ws['B1'] = "RTavg" for i, item in enumerate(self.expr_code): currcell = ws.cell(row=1, column=i + 3, value="%s" % item) comment = Comment('sample ' + str(i), 'dave') currcell.comment = comment # for each alignment position write alignment's peak and area for peak_idx in range(len( self.peakpos[0])): # loop through peak lists (rows) new_peak_list = [] for align_idx in range(len( self.peakpos)): # loops through samples (columns) peak = self.peakpos[align_idx][peak_idx] if peak is not None: if minutes: rt = peak.get_rt() / 60.0 else: rt = peak.get_rt() area = peak.get_area() new_peak_list.append(peak) # write the RT into the cell in the excel file currcell = ws.cell(row=2 + peak_idx, column=3 + align_idx, value=round(rt, 3)) # get the mini-mass spec for this peak, and divide the ion intensities by 1000 to shorten them ia = peak.get_ion_areas() ia.update((mass, int(intensity / 1000)) for mass, intensity in ia.items()) sorted_ia = sorted(ia.iteritems(), key=operator.itemgetter(1), reverse=True) # write the peak area and mass spec into the comment for the cell comment = Comment( "Area: %.0f | MassSpec: %s" % (area, sorted_ia), 'dave') currcell.number_format currcell.comment = comment else: rt = 'NA' area = 'NA' currcell = ws.cell(row=2 + peak_idx, column=3 + align_idx, value='NA') comment = Comment("Area: NA", 'dave') currcell.number_format currcell.comment = comment compo_peak = composite_peak(new_peak_list, minutes) peak_UID = compo_peak.get_UID() peak_UID_string = ('"%s"' % peak_UID) currcell = ws.cell(row=2 + peak_idx, column=1, value=peak_UID_string) currcell = ws.cell(row=2 + peak_idx, column=2, value="%.3f" % float(compo_peak.get_rt() / 60)) # colour the cells in each row based on their RT percentile for that row i = 0 for row in ws.rows: i += 1 cell_range = ("{0}" + str(i) + ":{1}" + str(i)).format( utils.get_column_letter(3), utils.get_column_letter(len(row))) ws.conditional_formatting.add( cell_range, ColorScaleRule(start_type='percentile', start_value=1, start_color='E5FFCC', mid_type='percentile', mid_value=50, mid_color='FFFFFF', end_type='percentile', end_value=99, end_color='FFE5CC')) wb.save(excel_file_name)
def write_csv_dk(self, rt_file_name, area_file_name, minutes=True): """ @summary: Writes the alignment to CSV files, but excluded outliers from the calculation of composite peak This function writes two files: one containing the alignment of peak retention times and the other containing the alignment of peak areas. @param rt_file_name: The name for the retention time alignment file @type rt_file_name: StringType @param area_file_name: The name for the areas alignment file @type area_file_name: StringType @param minutes: An optional indicator whether to save retention times in minutes. If False, retention time will be saved in seconds @type minutes: BooleanType @author: David Kainer """ try: fp1 = open(rt_file_name, "w") fp2 = open(area_file_name, "w") except IOError: error("Cannot open output file for writing") # create header header = '"UID","RTavg"' for item in self.expr_code: expr_code = ( '"%s"' % item ) header = header + "," + expr_code header = header + "\n" # write headers fp1.write(header) fp2.write(header) # for each alignment position write alignment's peak and area for peak_idx in range(len(self.peakpos[0])): # loop through peak lists (rows) rts = [] areas = [] new_peak_list = [] for align_idx in range(len(self.peakpos)): # loops through samples (columns) peak = self.peakpos[align_idx][peak_idx] if peak is not None: if minutes: rt = peak.get_rt()/60.0 else: rt = peak.get_rt() rts.append(rt) areas.append(peak.get_area()) new_peak_list.append(peak) else: rts.append(numpy.nan) areas.append(None) compo_peak = composite_peak(new_peak_list, minutes) peak_UID = compo_peak.get_UID() peak_UID_string = ( '"%s"' % peak_UID) # write to retention times file fp1.write(peak_UID_string) fp1.write(",%.3f" % float(compo_peak.get_rt()/60)) for rt in rts: if numpy.isnan(rt): fp1.write(",NA") else: fp1.write(",%.3f" % rt) fp1.write("\n") # write to peak areas file fp2.write(peak_UID_string) fp2.write(",%.3f" % float(compo_peak.get_rt()/60)) for area in areas: if area == None: fp2.write(",NA") else: fp2.write(",%.0f" % area) fp2.write("\n") fp1.close() fp2.close()