assert "ProteinName" in header assert "rt_calibrated" in header assert "charge" in header assert "product" in header assert "library_intensity" in header # group the csv lines by their group id tr_group = {} for line in lines: if line[header_d["group_id"]] in tr_group: tr_group[line[header_d["group_id"]]].append(line) else: tr_group[line[header_d["group_id"]]] = [line] # loop through all groups and create a sptxt spectrum for each group l = speclib_db_lib.Library(1) cnt = 0 for key in tr_group: group = tr_group[key] firstline = group[0] spectrum = speclib_db_lib.Spectra() spectrum.name = firstline[header_d["group_id"]].replace('.', '/') spectrum.LibID = 1 spectrum.MW = firstline[header_d["precursor"]] spectrum.precursorMZ = firstline[header_d["precursor"]] spectrum.status = "Normal" spectrum.full_name = firstline[header_d["group_id"]].replace('.', '/') spectrum.sequence = firstline[header_d["PeptideSequence"]] spectrum.number_peaks = len(group) ##### The comment
def main(argv) : distance = 1.0 algorithm = False #Get options try: opts, args = getopt.getopt(argv, "hd:i:t:a:",["help","distance","algorithm"]) except getopt.GetoptError: usage() sys.exit(2) argsUsed = 0 for opt,arg in opts: if opt in ("-h","--help") : usage() sys.exit() if opt in ("-d","--distance") : distance = float(arg) argsUsed += 2 if opt in ("-a","--algorithm") : algorithm = arg argsUsed += 2 sptxtfiles_pat = argv[argsUsed:] sptxtfiles = [] for pat in sptxtfiles_pat : sptxtf = glob.glob(pat) for file in sptxtf : sptxtfiles.append(file) for sptxtfile in sptxtfiles : transitions = [] print("Reading : " , sptxtfile) assert sptxtfile[-6:] == '.sptxt' if not os.path.exists(sptxtfile): print("The file: %s does not exist!" % sptxtfile) sys.exit(2) library_key = 99 spectrastlib = speclib_db_lib.Library(library_key) num_spectrum = 0 offset = spectrastlib.get_first_offset(sptxtfile) last_offset = -100 #Get all the peptide sequences and retention times to cluster them. Keep the spectrum number associated. peptide_spectra = {} # { "SEQUEN[Pho]CE" : {last_offset1 : RT1 , last_offset2 : RT2 } , "SEQUENCE" : {last_offset3 : RT3 , last_offset4 : RT4 , ... } , ... } while ( offset - last_offset > 10) : last_offset = offset offset , spectrum = spectrastlib.read_sptxt_with_offset(sptxtfile,offset) #for property, value in vars(spectrum).iteritems(): # if property in ['compress_spectra' ] : continue # print property, ": ", value #sys.exit() sequence = spectrum.name.split('/')[0] z_parent = float(spectrum.name.split('/')[1]) if spectrum.RetTime_detected: rt = spectrum.RetTime if spectrum.iRT_detected: rt = spectrum.iRT if not spectrum.RetTime_detected and not spectrum.iRT_detected: print("No RT/iRT was detected for %s" % spectrum.name) sys.exit(2) if sequence in list(peptide_spectra.keys()) : peptide_spectra[sequence][last_offset] = rt else : peptide_spectra[sequence] = { last_offset : rt } max_num_of_clusters = 0 peptide_spectra_cl = {} print("cluster spectra by iRTs...") for sequence, spectra in peptide_spectra.items() : print(sequence) #, spectra rt_clusters = clusterRT(list(spectra.values()), distance, algorithm = algorithm) if len(rt_clusters) > max_num_of_clusters : max_num_of_clusters = len(rt_clusters) peptide_spectra_cl[sequence] = {} for spectrum, rt in spectra.items() : # Determine cluster number for this rt cl_index = -1 for index, cluster in enumerate(rt_clusters) : #print index, rt, cluster cl = cluster if not isinstance(cluster,list) : cl = [cluster] if rt in cl : cl_index = index #store cluster index in a dictionary peptide_spectra_cl[sequence][spectrum] = cl_index splitfiles = [ open(sptxtfile[:-6]+"_"+str(x+1)+".sptxt",'w') for x in range(max_num_of_clusters) ] #init the files by using the original header print("%s files will be created." % max_num_of_clusters) original_header = spectrastlib.get_fileheader(sptxtfile) for file in splitfiles : for line in original_header : file.write(line) for sequence, spectra in peptide_spectra_cl.items() : for spectrum in spectra : sp = spectrastlib.get_rawspectrum_with_offset(sptxtfile,spectrum) #get the spectrum for line in sp : #Add suffix to the protein name if 'Comment:' in line[:8] : line_bcp = line mm = re.search( 'Protein=(.*?)\s', line ) if not mm: break split_idx = line.index('Protein=') + 8 line_before_split = line[:split_idx] line_after_split = line[split_idx:] line = line_before_split + "Subgroup_" + str(peptide_spectra_cl[sequence][spectrum]) + "_" + line_after_split splitfiles[peptide_spectra_cl[sequence][spectrum]].write(line) for file in splitfiles : file.close() print("done.")