def getCharges(self): _c_min = 0 _c_max = 0 _num_oxydized = 0 _indx_O = None for _N, _block, i in zip(self.N, Chem.Blocks, range(Chem.getNumBlocks())): # treat oxygen separately if _block.name == 'O': _indx_O = i continue _c_min += _N * _block.getMinCharge() _c_max += _N * _block.getMaxCharge() _num_oxydized += _N if _block.hasTag( Chem.BlockTag.canOxydize) else 0 if _indx_O is not None: # there is oxygen, compute its charge _N = self.N[_indx_O] _block = Chem.Blocks[_indx_O] rem = _N - _num_oxydized _c_min += rem * _block.getMinCharge() # + 0*_num_oxydized _c_max += _ * _block.getMaxCharge() return _c_min, _c_max
def solveCombination(total, verbose=False): tmp = [ CombinationResult(total), ] for level in range(Chem.getNumBlocks()): if verbose: print('Solving level %d of %d' % (level, Chem.getNumBlocks())) print(' inputs=%s' % str(tmp)) results = [] for e in tmp: results += e.iterate(verbose=verbose) tmp = results return results
def buildIsotopeTree(self, cutoff): elements = self.getElements() tree = puzzlotope.probability.TreeNode() for elem in elements: probs, labels = Chem.getElement(elem).getProbsAndLabels() tree.addLevel(probs, labels) tree.cutoff(cutoff) return tree
def spawn(self, newN, verbose): level = len(self.N) ret = CombinationResult(self.total, self.weight, self.N) ret.weight += Chem.getBlockMasses(level) * newN ret.N.append(newN) if verbose: print('spawning %s => %s' % (self, ret)) return ret
def __init__(self, total, weight=None, N=[]): self.N = copy.copy(N) self.total = total if weight: self.weight = copy.copy(weight) else: self.weight = 0.0 for i in range(len(self.N)): self.weight += self.N[i] * Chem.getBlockMasses(i)
def __init__(self, combination, tolerance, cutoff): tree = combination.buildIsotopeTree(cutoff) self.comb = combination self.pms = [] for leaf in tree.getLeafs(): mass = 0.0 for isotope in leaf.getBranchString(): mass += Chem.getIsotopeMass(isotope) self.pms.append(ProbMass(leaf.prob, mass)) self.__normalize(tolerance, cutoff)
def _createChildren(combN): ret = [] if sum(combN) == 1: return ret for i in range(Chem.getNumBlocks()): if combN[i] == 0: continue ret.append(CIDSpectrum._splitoff(combN, i)) return ret
def getCombString(self, space=True): string = [] for i in range(Chem.getNumBlocks()): _N = self.N[i] if space: if _N > 0: string.append('%3d %s' % (_N, Chem.Blocks[i].toSymbol())) else: string.append('%3s %s' % ('', ' ' * len(Chem.Blocks[i].toSymbol()))) else: if _N > 0: string.append('%d %s' % (_N, Chem.Blocks[i].toSymbol())) return ' '.join(string)
def iterate(self, verbose): if self.isComplete(): return else: level = len(self.N) maxN = math.ceil( (self.total - self.weight) / Chem.getBlockMasses(level)) + 1 if verbose: print('Iterating level %d, N=%s, w=%f' % (level, str(self.N), self.weight)) for i in range(maxN + 1): if verbose: print(' trying %d for level %d' % (i, level)) yield self.spawn(i, verbose) return
def isComplete(self): return len(self.N) >= Chem.getNumBlocks()
def getElements(self): elements = [] for i in range(len(self.N)): if self.N[i] > 0: elements += Chem.getBlock(i).getElements() * self.N[i] return sorted(elements)
_invalid_count = 0 _neglected = [] _neglected_reasons = [] for comb in combinations: valid, reasons = comb.isValid(tolerance) if valid: _valid.append(comb) elif reasons: _neglected.append(comb) _neglected_reasons.append(' AND '.join(reasons)) else: _invalid_count += 1 print(' %d invalid combinations' % _invalid_count, file=file_out) print(' %d neglected:' % len(_neglected), file=file_out) for _n, _reason in zip(_neglected, _neglected_reasons): print(' %s: %s' % (_n.getCombString(), _reason), file=file_out) print(' %d combinations remaining:' % len(_valid), file=file_out) for _c in _valid: print(' ' + _c.getCombString(), file=file_out) return _valid if __name__ == '__main__': Chem.update() __N = ([ 0, ] * Chem.getNumBlocks()) __N[-1] = 2 cr = CombinationResult(415, weight=415, N=__N) print(Spectrum(cr, 0.3, 1e-7))
def run(proj_name, spectrum_fname, weight_tolerance, prob_cutoff, xlims, cid_peaks, cid_tolerance, recompute=False, do_plots=False, num_threads=multiprocessing.cpu_count()): Chem.update() sep = '-'*15 print(sep) print('\n Calculating project \'%s\'\n' % proj_name) print(sep) ### Step 0: Read measured Spectrum x, y = Measurement.parseFile(spectrum_fname, xmin=xlims[0]-0.5, xmax=xlims[1]+0.5) sigma, diracs = gmm.estimateSigma(x, y) masses_meas=[p.mass for p in diracs] weights_meas=[p.prob for p in diracs] mass_main_peak=masses_meas[weights_meas.index(max(weights_meas))] print('Step 0: Read spectrum from "%s"' % spectrum_fname) print(' using %d mass peaks' % len(diracs)) print(' main mass was detected at %f' % mass_main_peak) print(sep) ### Step 1: Computing possible combinations print('Step 1: Computing main isotope combinations') s1_fname='data/%s_combinations.p' % proj_name s1_txtname='data/%s_combinations.txt' % proj_name print(' storing results in %s' % s1_fname) print(' a detailed list of combinations is in %s' % s1_txtname) print(' target mass is %f' % mass_main_peak) if recompute or not os.path.isfile(s1_fname): allCombinations = puzzlotope.Solver.solveCombination(mass_main_peak) with open(s1_txtname,'w') as txtf: filteredCombinations = puzzlotope.Solver.CombinationResult.filter(allCombinations, weight_tolerance, file_out=txtf) with open(s1_fname, 'wb') as _f: pickle.dump(filteredCombinations, _f) else: with open(s1_fname, 'rb') as _f: filteredCombinations = pickle.load(_f) print(' loaded %d combinations' % len(filteredCombinations)) print(' Using %d combinations:' % len(filteredCombinations)) for comb in filteredCombinations: print(' ' + comb.getCombString()) print(sep) ### Step 2: Computing spectra for found combinations print('Step 2: Computing spectrum for each combination') s2_fname='data/%s_spectra.p' % proj_name print(' storing results in %s' % s2_fname) print(' number of combinations is %d' % len(filteredCombinations)) if recompute or not os.path.isfile(s2_fname): start = timer() spectra = puzzlotope.Solver.buildSpectrums(filteredCombinations, weight_tolerance, prob_cutoff, num_threads, verbose=True, prefix=' ') end = timer() ellapsed = (end-start) print(' Time for spectrum computation: %f s' % ellapsed) print(' thats %f spectra per minute' % (len(filteredCombinations)/ellapsed*60.0)) with open(s2_fname, 'wb') as _f: pickle.dump(spectra, _f) else: with open(s2_fname, 'rb') as _f: spectra = pickle.load(_f) print(' loaded %d spectra' % len(spectra)) print(sep) ### Step 3: Build CID Spectra print('Step 3: Computing CIB Spectra') s3_fname='data/%s_cidspectra.p' % proj_name print(' storing results in %s' % s3_fname) if recompute or not os.path.isfile(s3_fname): start = timer() cidspectra = puzzlotope.cidspectrum.buildSpectrums(filteredCombinations, cid_peaks, cid_tolerance, num_threads, verbose=True, prefix=' ') end = timer() ellapsed = (end-start) print(' Time for CID spectrum computation: %f s' % ellapsed) print(' thats %f spectra per minute' % (len(filteredCombinations)/ellapsed*60.0)) with open(s3_fname, 'wb') as _f: pickle.dump(cidspectra, _f) else: with open(s3_fname, 'rb') as _f: cidspectra = pickle.load(_f) print(' loaded %d spectra' % len(spectra)) print(sep) ### Step 4: Analyse isotope spectra print('Step 4: Analyse isotope spectra') s4_txtname = 'data/%s_spectra.txt' % proj_name s4_csvname = 'data/%s_spectra.csv' % proj_name with open(s4_txtname,'w') as txtf: with open(s4_csvname, 'w') as csvf: puzzlotope.Solver.Spectrum.printSpectra(spectra, xlims[0], xlims[1], diracs, prefix=' ', f_txt=txtf, f_csv=csvf) print(sep) ### Step 5: Analyse CID spectra print('Step 5: Analyse CID spectra') s5_txtname = 'data/%s_cidspectra.txt' % proj_name s5_csvname = 'data/%s_cidspectra.csv' % proj_name with open(s5_txtname,'w') as txtf: with open(s5_csvname, 'w') as csvf: puzzlotope.cidspectrum.printSpectra(cidspectra, cid_peaks, cid_tolerance, prefix=' ', f_txt=txtf, f_csv=csvf) print(sep) ### Step 6: Combined metric print('Step 6: Combined metric') s6_csvname = 'data/%s_finalmetrics.csv' % proj_name finaldict = {_c.getCombString(): [_c.getCombString(), None, None, None] for _c in filteredCombinations} for _s in spectra: finaldict[_s.comb.getCombString()][2] = _s.metrics[0] for _s in cidspectra: finaldict[_s.original_comb.getCombString()][3] = _s.metric for _k, _v in finaldict.items(): finaldict[_k][1] = _v[2]+_v[3] finalvalues = sorted(finaldict.values(), key=lambda x: x[1]) TOPN = min(10, len(finalvalues)) print(' Top %d (of %d) combinations based on combined isotope and CID spectrum.' % (TOPN, len(finalvalues))) print(' full list is in:') print(' %s' % s6_csvname) print('') comblen = filteredCombinations[0].getStringLen() print(' %-*s | Metric |' % (comblen, '')) print(' %-*s | Combined | Isotope | CID |' % (comblen, 'Combination')) for i in range(TOPN): print(' %s | %8.2f | %8.2f | %8.2f |' % tuple(finalvalues[i])) with open(s6_csvname, 'w') as f_csv: print('"Combination","Combined Metric","Source Metric","CID Metric"', file=f_csv) for i in range(len(finalvalues)): print('"%s","%f","%f","%f"' % tuple(finalvalues[i]), file=f_csv) print(sep) """ ### Step 99: Analyse predicted spectral measurements print('Step 99: Analyse predicted spectral measurements') pdf_meas=y/sum(y) pdfs=[pdf_meas,] labels=['Measurement',] print('Comparing spectra to measurement') for spectrum in spectra: _masses = [p.mass for p in spectrum.pms] ofst = gmm.estimateOffset(masses_meas, _masses, weight_tolerance) _pdf = gmm.buildPDF(x, spectrum.pms, -ofst, sigma) pdfs.append(_pdf) labels.append(spectrum.comb.getCombString(space=False)) print(' %s: RMS=%12.5f' % (spectrum.comb.getCombString(), Measurement.getRMS(pdf_meas, _pdf))) _reproj_pdf = gmm.buildPDF(x, diracs, 0.0, sigma) print('Reprojection RMS') print(' %-*s: RMS=%12.5f' % (len(spectra[0].comb.getCombString()), 'Measurement', Measurement.getRMS(pdf_meas, _reproj_pdf))) print(sep) """ if do_plots: gmm.plotPDFs(x, pdfs, labels, 'Spectrum comparison')