for isotopelabel_id, data in allsumionratiodata.iteritems(): allfractionbgratios[isotopelabel_id] = np.median(data) # second normalization so that the bg-ratios all add to 1 for isotopelabel_id, data in allfractionbgratios.iteritems(): allfractionbgratios[isotopelabel_id] = data / sum(allfractionbgratios.values()) logger.log.debug(('allfractionbgratios are %s' % str(allfractionbgratios))) for corrects2iquantob in corrects2iquantoblist: # perform correction for each of the analyzed .hdf5 files. s2icorrecteddata = corrects2iquantob.performS2Icorrection(allfractionbgratios) corrects2iquantob.hdf5corrects2iquant.updates2ivalues(s2icorrecteddata) hdf5corrects2iquant.close() except ExHa.czException as czEx: ExHa.reformatException(czEx) ExHa.addContext(czEx, 'Error during corrects2iquant run') ExHa.exportError2File(czEx, cfg.parameters['runtime']['datadir'] / Path('errors.error')) if logger: logger.log.warning(ExHa.oneLineRepr(czEx)) else: print ExHa.multiLineRepr(czEx) except Exception as genEx: ExHa.reformatException(genEx) ExHa.addContext(genEx, 'Error during corrects2iquant run') ExHa.exportError2File(genEx, cfg.parameters['runtime']['datadir'] / 'errors.error') if logger: logger.log.warning(ExHa.oneLineRepr(genEx)) else: print ExHa.multiLineRepr(genEx)
def updateHDF5(self): """ @brief controls the updating of the data to the hdf5 results file @return finalMessage <string>: constructed from the protein data this is the RESULT stored in the DB """ pep2unique = self.pep2unique baseContext = 'updateHDF5: ' context = 'updateHDF5' try: # find the peptide sequences that are being imported usedPeps = self.setsManager.findUsedPeptides() logger.log.info('there are %s usedPeps' % len(usedPeps)) context = baseContext + 'Retrieving sample IDs' sample_ids = range(1, len(self.hdfFiles) + 1) # create proteinset and proteinhit data starting_protein_group_no = 1 self.setsManager.setProteinGroupNo(starting_protein_group_no) logger.log.info('adding protein group data to HDF5') logger.log.debug(str(self.hdfFiles.keys())) spectrum_id = 0 peptide_id = 0 hdfFileList = self.hdfFiles.keys() hdfFileList.sort() for key in hdfFileList: baseContext += '%s: ' % key logger.log.log( logger.PROCESS, 'Integrating Spectrum, Peptide & Quantification data from %s' % key) # collect fileData hdf = self.hdfFiles[key] hdfObj = hdf.hdfObject # set the current sample_id from the list of IDs extracted from the DB current_sample_id = sample_ids.pop() hdf.acquired_spectra, hdf.mascot_matched_spectra, numIsotopes, runTime = hdfObj.getNumbers( ) # read the Mascot data context = baseContext + 'Reading Mascot data' tmp = hdfObj.readImporterData(usedPeps, hdf) peptides = tmp[0] queryDict = tmp[1] headerArray = tmp[2] quanArray = tmp[3] hdf.spectra_in_qc_proteins = len(peptides) logger.log.debug('getting spectrum_ids') context = baseContext + 'Retrieving spectrum IDs' acqTime, hdf.idAct, hdf.quanAct = hdfObj.getTimeAndActivation() # create blank lists to hold data for writing to hdf5 file spectrum_list = [] peptide_list = [] quant_list = [] logger.log.info('collating spectrum, peptide & quant data') pBar = progBar.ProgressBar(widgets=progBar.name_widgets, maxval=len(queryDict), name='collate data').start() for idx, q in enumerate(queryDict): # loop round all the required spectra pBar.nextPrimary() context = baseContext + 'query %i: Setting spectrum data' % q # extract a spectrum_id from the list spectrum_id += 1 query = queryDict[q] spec = int(query['spec_id']) context = baseContext + 'spectrum %i: Updating DB with spectrum data' % spec # add spectrum data to spectrum_list header = self.filterArrayEqual(headerArray, 'spec_id', spec) spectrum_list.append( self.makeSpectrumDict(spectrum_id, current_sample_id, query, acqTime, header)) # find the appropriate peptides pepList = peptides[q] logger.log.debug('there are %s in peplist %s' % (len(pepList), str(pepList))) quantFound = 0 # this list will hold all peptides returned from makePeptideDictList and then filter # those non-rank1 equivalents based on the score of the rank 1 peptide tmplist = [] for pep in pepList: # find the sets that the peptide belongs to and add to the peptide_list sets = self.setsManager.peptide2set[pep['peptide']] context = baseContext + 'spectrum %i: Creating peptide data entries for hdf5' % spec tmp, qf = self.makePeptideDictList( spectrum_id, pep, query, sets, hdf, pep2unique) tmplist.extend(tmp) peptide_list += tmp quantFound += qf # only keep rank1 equivalent peptides (based on score) tmplist.sort(key=lambda x: x['rank']) toprankscore = tmplist[0]['score'] tmplist = [ x for x in tmplist if x['score'] == toprankscore ] if quantMethID and quantFound: # extract quantification data for the spectrum context = baseContext + 'spectrum %i: Creating quantitation data entries for DB' % spec newquant, deltas = self.makeQuantDictLists( spectrum_id, spec, tmplist, header, quanArray, hdf) quant_list += newquant if quantSource == 'ms2': context = baseContext + 'spectrum %i: Adding reporter ion delta data' % spec hdf.addReporterDeltas(deltas) pBar.finish() # calculate statistics context = baseContext + 'Calculating statistics' hdf.calcReporterStats() context = baseContext + 'Calculating delta m/z for fragment ions' context = baseContext + 'Updating sample table (%i)' % current_sample_id sample_data = hdf.getSampleDataDict(current_sample_id, key, runTime) hdf5results.writeSample(sample_data) self.importData.combineStatistics(hdf) # write data to HDF5 context = baseContext + 'Updating spectrum table' logger.log.info('updating HDF5 with spectrum data') hdf5results.writeSpectrum(spectrum_list) if quantMethID: context = baseContext + 'Updating specquant table' logger.log.info('updating HDF5 with quant data') hdf5results.writeSpecQuant(quant_list) context = baseContext + 'Retrieving peptide IDs' logger.log.info('updating HDF5 with peptide data') for pepdata in peptide_list: pepdata['peptide_id'] = peptide_id peptide_id += 1 context = baseContext + 'Updating peptide table' hdf5results.writePeptide(peptide_list) hdf5results.createIndexes() logger.log.info('finalising HDF5 entries') hdf5results.writeFDRdata(self.importData.score2fdr, 'peptide') hdf5results.writeFDRdata(self.importData.proteinscore2fdr, 'protein') topScoringProteinInfo = self.setsManager.addPeptideSetDBdata( hdf5results, self.importData.proteinscore2fdr) runtimedata = self.importData.getSummaryStatisticsDict() hdf5results.writeStatistics(runtimedata) finalMessage = 'queries matched: %i / %s (%.1f%%) ' % ( runtimedata['spectra_in_qc_proteins'], runtimedata['mascot_matched_spectra'], (runtimedata['spectra_in_qc_proteins'] / float(runtimedata['mascot_matched_spectra'])) * 100) finalMessage += 'spectra quantified: %i top hit %s (%s) ' % ( runtimedata['quantified_spectra'], '', '') finalMessage += 'with total score %f and %i matched peptides (hook AND non hook)' % \ (topScoringProteinInfo[0], topScoringProteinInfo[2]) baseContext = 'updateHDF5: ' context = baseContext + 'Finalising HDF5 entries' except Exception, genEx: # make sure that there aren't any permanent changes ExHa.addContext(genEx, context) finalMessage = 'Error: %s' % ExHa.oneLineRepr(genEx) raise
ret = cfg.evaluateCommandLineArgs(sys.argv) try: cfg.scalePpmMda() dataDir = cfg.parameters['runtime']['datadir'] logParam = cfg.parameters['logging'] logPath = Path(dataDir.joinpath(logParam['logdir'])) if not logPath.exists(): logPath.mkdir(parents=True) logFile = logPath.joinpath(logParam['logfile']) logger = Logger(logFile, logParam['loglevel'], logParam['screenlevel'], False) logger.setMascotParserLogs() jobcontrol(cfg, logger) except ExHa.UsageError as useEx: ExHa.reformatException(useEx) print useEx.context except Exception as genEx: ExHa.reformatException(genEx) errorFile = Path(cfg.parameters['runtime']['hdf5file']).stem + '.error' ExHa.exportError2File( genEx, cfg.parameters['runtime']['datadir'].joinpath(errorFile)) if logs: logs.datlog.warning(ExHa.oneLineRepr(genEx)) else: print ExHa.multiLineRepr(genEx)
importer.importData.mascot_matched_spectra) logger.log.info('Total Spectra Matched = %6i' % importer.importData.spectra_in_qc_proteins) logger.log.info('Total Spectra w Quant = %6i' % importer.importData.quantified_spectra) logger.log.info('Total Spectra All Rep = %6i' % importer.importData.numSpectraAllReporters) times = sw.stop() logger.log.info(sw.format()) hdf5results.close() except ExHa.UsageError as useEx: ExHa.reformatException(useEx) logger.log.warning(ExHa.oneLineRepr(useEx)) except Exception as genEx: # error if logger: logger.log.warning(ExHa.oneLineRepr(genEx)) print ExHa.multiLineRepr(genEx) else: print ExHa.multiLineRepr(genEx) if cfg: ExHa.exportError2File(genEx, dataDir.joinpath(resultfile.stem + '.error')) else: ExHa.exportError2File(genEx, dataDir.joinpath('errors.error')) sys.exit(ExHa.oneLineRepr(genEx))