def format(self, new_file_name=None): fin = open(self.orig_file) csvReader = csv.reader(fin) headers = map(lambda x: x.strip(), csvReader.next()) new_headers = self.convert_headers(headers) spec_desc = new_headers.index('Spectrum Description') mz = new_headers.index('Experimental mz') charge = new_headers.index('Charge') seq = new_headers.index('Peptide Sequence') var_mods = new_headers.index('Variable Modifications') new_data = [] for prerow in csvReader: row = map(lambda x: x.strip(), prerow) row[spec_desc] = self.convert_spectrum(row[spec_desc]) row[mz] = self.convert_mass(row[mz],row[charge]) (the_seq,the_mods) = self.convert_seq(row[seq]) row[seq] = the_seq row[var_mods] = the_mods new_data.append( row ) fin.close() dir_split = os.path.split(self.orig_file) if not new_file_name: new_file_name = os.path.join(dir_split[0], "mz_" + dir_split[1]) report = mz.Report.writer(new_file_name, columns = new_headers) for data in new_data: report.write(data) report.close()
def format_XML(xTandem_file, save_file, rev_mods=None, db_regex=None): '''Create a report from the xTandem output (which is an XML file)''' rows = xTandemArray(xTandem_file, rev_mods, db_regex) if os.path.exists(save_file): os.remove(save_file) report = mz.Report.writer(save_file, columns=['Expect'], default_columns=True) for row in rows: report.write(row) report.close()
def format(self, new_file_name=None): fh = open(self.orig_file) #Change header names header_line = fh.readline() headers = header_line.strip().split('\t') new_headers = multiplierz.mzReport.default_columns[:] new_headers.extend(('MS2 Time', 'Protein Coverage')) new_headers.remove('Peptide Rank') new_headers.remove('Query') new_headers.extend(h for h in headers if h not in self.reps) if 'Unused' in new_headers: new_headers.remove('Unused') if 'Contrib' in new_headers: new_headers.remove('Contrib') if 'Sc' in new_headers: new_headers.remove('Sc') rows = [] protein_matches = defaultdict(int) for line in fh: new_data = dict((h,None) for h in new_headers) data = dict(zip(headers,line[:-1].split('\t'))) protein_matches[data['Accessions']] += 1 for h in data: if h in self.reps and self.reps[h] in new_data: new_data[self.reps[h]] = data[h] elif h in new_data: new_data[h] = data[h] elif h not in ('Unused','Contrib','Sc'): logger_message(10, 'Missing key: %s' % h) if 'Modifications' in data: new_data['Variable Modifications'] = self.convert_var_mod(data['Modifications']) if 'Cleavages' in data: new_data['Missed Cleavages'] = data['Cleavages'].count('missed') if 'Time' in data and 'Spectrum' in data: new_data['Spectrum Description'] = self.convert_spectrum(data['Spectrum'], data['Time']) rows.append(new_data) fh.close() for row in rows: row['Protein Matches'] = protein_matches[row['Accession Number']] dir_split = os.path.split(self.orig_file) if not new_file_name: new_file_name = os.path.join(dir_split[0], "mz_" + dir_split[1]) report = mz.Report.writer(new_file_name, columns=new_headers) for row in rows: report.write(row) report.close()