def test_consumer(self): (ms1_peak_picking_args, msn_peak_picking_args, ms1_deconvolution_args, msn_deconvolution_args) = self.build_args() outdir = self.make_output_directory() outpath = os.path.join(outdir, "test-output.mzML") consumer = SampleConsumer( agp_glycomics_mzml, ms1_peak_picking_args=ms1_peak_picking_args, ms1_deconvolution_args=ms1_deconvolution_args, msn_peak_picking_args=msn_peak_picking_args, msn_deconvolution_args=msn_deconvolution_args, storage_path=outpath, sample_name=None, n_processes=5, extract_only_tandem_envelopes=False) consumer.start() reader = ProcessedMzMLDeserializer(outpath) scan = reader.get_scan_by_id("scanId=1601016") self.assertIsNotNone( scan.deconvoluted_peak_set.has_peak(958.66, use_mz=1)) reader.close() self.cleanup(outdir)
def test_consumer(self): (ms1_peak_picking_args, msn_peak_picking_args, ms1_deconvolution_args, msn_deconvolution_args) = self.build_args() outdir = self.make_output_directory() outpath = os.path.join(outdir, "test-output.mzML") consumer = SampleConsumer( agp_glycomics_mzml, ms1_peak_picking_args=ms1_peak_picking_args, ms1_deconvolution_args=ms1_deconvolution_args, msn_peak_picking_args=msn_peak_picking_args, msn_deconvolution_args=msn_deconvolution_args, storage_path=outpath, sample_name=None, n_processes=5, extract_only_tandem_envelopes=False) consumer.start() reader = ProcessedMzMLDeserializer(outpath) scan = reader.get_scan_by_id("scanId=1601016") self.assertIsNotNone(scan.deconvoluted_peak_set.has_peak(958.66, use_mz=1)) reader.close() self.cleanup(outdir)
def _make_scan_loader(self): if self.mzml_path is not None: if not os.path.exists(self.mzml_path): raise IOError("No such file {}".format(self.mzml_path)) self.scan_loader = ProcessedMzMLDeserializer(self.mzml_path) else: self.mzml_path = self.analysis.parameters['sample_path'] if not os.path.exists(self.mzml_path): raise IOError(( "No such file {}. If {} was relocated, you may need to explicily pass the" " corrected file path.").format( self.mzml_path, self.database_connection._original_connection)) self.scan_loader = ProcessedMzMLDeserializer(self.mzml_path)
def __init__(self, record, minimum_mass=None, abundance_threshold=None): SimpleViewBase.__init__(self) self.record = record self.reader = ProcessedMzMLDeserializer(record.path) self.scan_levels = { "1": len(self.reader.extended_index.ms1_ids), "N": len(self.reader.extended_index.msn_ids) } self.minimum_mass = minimum_mass self.abundance_threshold = abundance_threshold self._chromatograms = None self.chromatograms = None self.total_ion_chromatogram = None self.oxonium_ion_chromatogram = None self.chromatogram_artist = None self.oxonium_ion_artist = None
def add_sample(project_path, sample_path): from glycresoft_app.project.project import Project from ms_deisotope.output.mzml import ProcessedMzMLDeserializer project = Project(project_path) reader = ProcessedMzMLDeserializer(abspath(sample_path)) record = project.sample_manager.make_record(reader) project.sample_manager.put(record) project.sample_manager.dump()
def is_resolvable(self): if not os.path.exists(self.path): return False reader = ProcessedMzMLDeserializer(self.path, use_index=False) sample_run = reader.sample_run if sample_run.uuid != self.uuid: return False return True
def test_consumer(self): (ms1_peak_picking_args, msn_peak_picking_args, ms1_deconvolution_args, msn_deconvolution_args) = self.build_args() outdir = self.make_output_directory() outpath = os.path.join(outdir, "test-output.mzML") consumer = SampleConsumer( agp_glycproteomics_mzml, ms1_peak_picking_args=ms1_peak_picking_args, ms1_deconvolution_args=ms1_deconvolution_args, msn_peak_picking_args=msn_peak_picking_args, msn_deconvolution_args=msn_deconvolution_args, storage_path=outpath, sample_name=None, n_processes=5, extract_only_tandem_envelopes=True, ms1_averaging=1) consumer.start() reader = ProcessedMzMLDeserializer(outpath) reference = ProcessedMzMLDeserializer( agp_glycproteomics_mzml_reference) for a_bunch, b_bunch in zip(reader, reference): assert a_bunch.precursor.id == b_bunch.precursor.id assert len(a_bunch.products) == len(b_bunch.products) for a_product, b_product in zip(a_bunch.products, b_bunch.products): assert a_product.precursor_information.defaulted == b_product.precursor_information.defaulted matched = np.isclose( a_product.precursor_information.neutral_mass, b_product.precursor_information.neutral_mass) message = [ "%0.3f not close to %0.3f for %s of %s" % (a_product.precursor_information.neutral_mass, b_product.precursor_information.neutral_mass, a_product.id, a_product.precursor_information.precursor_scan_id) ] message.append( "Found precursor score %r, expected %r" % (a_product.precursor_information.precursor. deconvoluted_peak_set.has_peak( a_product.precursor_information.neutral_mass).score, b_product.precursor_information.precursor. deconvoluted_peak_set.has_peak( b_product.precursor_information.neutral_mass).score)) assert matched, '\n'.join(message) assert len(a_product.deconvoluted_peak_set) == len( b_product.deconvoluted_peak_set) reader.close() reference.close() self.cleanup(outdir)
def oxonium_signature(ms_file, g_score_threshold=0.05): reader = ProcessedMzMLDeserializer(ms_file) if not reader.has_index_file(): click.secho("Building temporary index...", fg='yellow') index, intervals = quick_index.index(ms_deisotope.MSFileLoader(ms_file)) reader.extended_index = index with open(reader._index_file_name, 'w') as handle: index.serialize(handle) from glycan_profiling.tandem.glycan.scoring.signature_ion_scoring import SignatureIonScorer from glycan_profiling.tandem.oxonium_ions import gscore_scanner refcomp = glypy.GlycanComposition.parse("{Fuc:1; Hex:5; HexNAc:4; Neu5Ac:2}") for scan_id in reader.extended_index.msn_ids.keys(): scan = reader.get_scan_by_id(scan_id) gscore = gscore_scanner(scan.deconvoluted_peak_set) if gscore >= g_score_threshold: signature_match = SignatureIonScorer.evaluate(scan, refcomp) click.echo("%s\t%f\t%r\t%f\t%f" % ( scan_id, scan.precursor_information.neutral_mass, scan.precursor_information.charge, gscore, signature_match.score))
def make_peak_loader(self): peak_loader = ProcessedMzMLDeserializer(self.sample_path) if peak_loader.extended_index is None: if not peak_loader.has_index_file(): self.log("Index file missing. Rebuilding.") peak_loader.build_extended_index() else: peak_loader.read_index_file() if peak_loader.extended_index is None or len(peak_loader.extended_index.msn_ids) < 1: raise ValueError("Sample Data Invalid: Could not validate MS/MS Index") return peak_loader
def msfile_info(ms_file): reader = ProcessedMzMLDeserializer(ms_file) if not reader.has_index_file(): index, intervals = quick_index.index( ms_deisotope.MSFileLoader(ms_file)) reader.extended_index = index with open(reader._index_file_name, 'w') as handle: index.serialize(handle) click.echo("Name: %s" % (os.path.basename(ms_file), )) click.echo("MS1 Scans: %d" % (len(reader.extended_index.ms1_ids), )) click.echo("MSn Scans: %d" % (len(reader.extended_index.msn_ids), )) n_defaulted = 0 n_orphan = 0 charges = defaultdict(int) first_msn = float('inf') last_msn = 0 for scan_info in reader.extended_index.msn_ids.values(): n_defaulted += scan_info.get('defaulted', False) n_orphan += scan_info.get('orphan', False) charges[scan_info['charge']] += 1 rt = scan_info['scan_time'] if rt < first_msn: first_msn = rt if rt > last_msn: last_msn = rt click.echo("First MSn Scan: %0.2f Minutes" % (first_msn, )) click.echo("Last MSn Scan: %0.2f Minutes" % (last_msn, )) for charge, count in sorted(charges.items()): if not isinstance(charge, int): continue click.echo("Precursors with Charge State %d: %d" % (charge, count)) click.echo("Defaulted MSn Scans: %d" % (n_defaulted, )) click.echo("Orphan MSn Scans: %d" % (n_orphan, ))
def oxonium_signature(ms_file, g_score_threshold=0.05): reader = ProcessedMzMLDeserializer(ms_file) if not reader.has_index_file(): click.secho("Building temporary index...", fg='yellow') index, intervals = quick_index.index( ms_deisotope.MSFileLoader(ms_file)) reader.extended_index = index with open(reader._index_file_name, 'w') as handle: index.serialize(handle) from glycan_profiling.tandem.glycan.scoring.signature_ion_scoring import SignatureIonScorer from glycan_profiling.tandem.oxonium_ions import gscore_scanner refcomp = glypy.GlycanComposition.parse( "{Fuc:1; Hex:5; HexNAc:4; Neu5Ac:2}") for scan_id in reader.extended_index.msn_ids.keys(): scan = reader.get_scan_by_id(scan_id) gscore = gscore_scanner(scan.deconvoluted_peak_set) if gscore >= g_score_threshold: signature_match = SignatureIonScorer.evaluate(scan, refcomp) click.echo("%s\t%f\t%r\t%f\t%f" % (scan_id, scan.precursor_information.neutral_mass, scan.precursor_information.charge, gscore, signature_match.score))
def make_record(cls, reader): if isinstance(reader, basestring): reader = ProcessedMzMLDeserializer(reader, use_index=True) sample = reader.sample_run if len(reader.extended_index.msn_ids) > 0: sample_type = "MS/MS Sample" else: sample_type = "MS Sample" record = SampleRunRecord(name=sample.name, uuid=sample.uuid, path=reader.source_file, completed=True, sample_type=sample_type) return record
def msfile_info(ms_file): reader = ProcessedMzMLDeserializer(ms_file) if not reader.has_index_file(): index, intervals = quick_index.index(ms_deisotope.MSFileLoader(ms_file)) reader.extended_index = index with open(reader._index_file_name, 'w') as handle: index.serialize(handle) click.echo("Name: %s" % (os.path.basename(ms_file),)) click.echo("MS1 Scans: %d" % (len(reader.extended_index.ms1_ids),)) click.echo("MSn Scans: %d" % (len(reader.extended_index.msn_ids),)) n_defaulted = 0 n_orphan = 0 charges = defaultdict(int) first_msn = float('inf') last_msn = 0 for scan_info in reader.extended_index.msn_ids.values(): n_defaulted += scan_info.get('defaulted', False) n_orphan += scan_info.get('orphan', False) charges[scan_info['charge']] += 1 rt = scan_info['scan_time'] if rt < first_msn: first_msn = rt if rt > last_msn: last_msn = rt click.echo("First MSn Scan: %0.2f Minutes" % (first_msn,)) click.echo("Last MSn Scan: %0.2f Minutes" % (last_msn,)) for charge, count in sorted(charges.items()): if not isinstance(charge, int): continue click.echo("Precursors with Charge State %d: %d" % (charge, count)) click.echo("Defaulted MSn Scans: %d" % (n_defaulted,)) click.echo("Orphan MSn Scans: %d" % (n_orphan,))
def test_consumer(self): (ms1_peak_picking_args, msn_peak_picking_args, ms1_deconvolution_args, msn_deconvolution_args) = self.build_args() outdir = self.make_output_directory() outpath = os.path.join(outdir, "test-output.mzML") consumer = SampleConsumer( agp_glycproteomics_mzml, ms1_peak_picking_args=ms1_peak_picking_args, ms1_deconvolution_args=ms1_deconvolution_args, msn_peak_picking_args=msn_peak_picking_args, msn_deconvolution_args=msn_deconvolution_args, storage_path=outpath, sample_name=None, n_processes=5, extract_only_tandem_envelopes=True, ms1_averaging=1) consumer.start() reader = ProcessedMzMLDeserializer(outpath) reference = ProcessedMzMLDeserializer(agp_glycproteomics_mzml_reference) for a_bunch, b_bunch in zip(reader, reference): assert a_bunch.precursor.id == b_bunch.precursor.id assert len(a_bunch.products) == len(b_bunch.products) for a_product, b_product in zip(a_bunch.products, b_bunch.products): assert a_product.precursor_information.defaulted == b_product.precursor_information.defaulted matched = np.isclose(a_product.precursor_information.neutral_mass, b_product.precursor_information.neutral_mass) message = ["%0.3f not close to %0.3f for %s of %s" % ( a_product.precursor_information.neutral_mass, b_product.precursor_information.neutral_mass, a_product.id, a_product.precursor_information.precursor_scan_id)] message.append("Found precursor score %r, expected %r" % ( a_product.precursor_information.precursor.deconvoluted_peak_set.has_peak( a_product.precursor_information.neutral_mass).score, b_product.precursor_information.precursor.deconvoluted_peak_set.has_peak( b_product.precursor_information.neutral_mass).score )) assert matched, '\n'.join(message) assert len(a_product.deconvoluted_peak_set) == len(b_product.deconvoluted_peak_set) reader.close() reference.close() self.cleanup(outdir)
def analyze_glycopeptide_sequences( database_connection, sample_path, hypothesis_identifier, output_path, analysis_name, grouping_error_tolerance=1.5e-5, mass_error_tolerance=1e-5, msn_mass_error_tolerance=2e-5, psm_fdr_threshold=0.05, peak_shape_scoring_model=None, minimum_oxonium_threshold=0.05, workload_size=1000, use_peptide_mass_filter=True, mass_shifts=None, permute_decoy_glycan_fragments=False, include_rare_signature_ions=False, model_retention_time=False, search_strategy=GlycopeptideSearchStrategyEnum.classic, decoy_database_connection=None, decoy_hypothesis_id=None, tandem_scoring_model=None, channel=None, **kwargs): if peak_shape_scoring_model is None: peak_shape_scoring_model = GeneralScorer.clone() peak_shape_scoring_model.add_feature(get_feature("null_charge")) database_connection = DatabaseBoundOperation(database_connection) if decoy_database_connection: decoy_database_connection = DatabaseBoundOperation( decoy_database_connection) if not os.path.exists(sample_path): channel.send( Message("Could not locate sample %r" % sample_path, "error")) return reader = ProcessedMzMLDeserializer(sample_path, use_index=False) sample_run = reader.sample_run try: hypothesis = get_by_name_or_id(database_connection, GlycopeptideHypothesis, hypothesis_identifier) except Exception: channel.send( Message("Could not locate hypothesis %r" % hypothesis_identifier, "error")) channel.abort("An error occurred during analysis.") if decoy_database_connection: try: decoy_hypothesis = get_by_name_or_id(decoy_database_connection, GlycopeptideHypothesis, decoy_hypothesis_id) except Exception: channel.send( Message("Could not locate hypothesis %r" % decoy_hypothesis_id, "error")) channel.abort("An error occurred during analysis.") if analysis_name is None: analysis_name = "%s @ %s" % (sample_run.name, hypothesis.name) analysis_name = validate_analysis_name(None, database_connection.session, analysis_name) try: mass_shift_out = [] for mass_shift, multiplicity in mass_shifts: mass_shift_out.append(validate_mass_shift(mass_shift, multiplicity)) expanded = [] expanded = MzMLGlycanChromatogramAnalyzer.expand_mass_shifts( dict(mass_shift_out), crossproduct=False) mass_shifts = expanded except Abort: channel.send(Message.traceback()) return try: if search_strategy == GlycopeptideSearchStrategyEnum.classic: analyzer = MzMLGlycopeptideLCMSMSAnalyzer( database_connection._original_connection, hypothesis.id, sample_path, output_path=output_path, analysis_name=analysis_name, grouping_error_tolerance=grouping_error_tolerance, mass_error_tolerance=mass_error_tolerance, msn_mass_error_tolerance=msn_mass_error_tolerance, psm_fdr_threshold=psm_fdr_threshold, peak_shape_scoring_model=peak_shape_scoring_model, oxonium_threshold=minimum_oxonium_threshold, spectrum_batch_size=workload_size, use_peptide_mass_filter=use_peptide_mass_filter, mass_shifts=mass_shifts, permute_decoy_glycans=permute_decoy_glycan_fragments, rare_signatures=include_rare_signature_ions, model_retention_time=model_retention_time, tandem_scoring_model=tandem_scoring_model) elif search_strategy == GlycopeptideSearchStrategyEnum.classic_comparison: analyzer = MzMLComparisonGlycopeptideLCMSMSAnalyzer( database_connection._original_connection, decoy_database_connection._original_connection, hypothesis.id, sample_path, output_path=output_path, analysis_name=analysis_name, grouping_error_tolerance=grouping_error_tolerance, mass_error_tolerance=mass_error_tolerance, msn_mass_error_tolerance=msn_mass_error_tolerance, psm_fdr_threshold=psm_fdr_threshold, peak_shape_scoring_model=peak_shape_scoring_model, oxonium_threshold=minimum_oxonium_threshold, spectrum_batch_size=workload_size, use_peptide_mass_filter=use_peptide_mass_filter, mass_shifts=mass_shifts, permute_decoy_glycans=permute_decoy_glycan_fragments, rare_signatures=include_rare_signature_ions, model_retention_time=model_retention_time, tandem_scoring_model=tandem_scoring_model) elif search_strategy == GlycopeptideSearchStrategyEnum.multipart: analyzer = MultipartGlycopeptideLCMSMSAnalyzer( database_connection._original_connection, decoy_database_connection._original_connection, hypothesis.id, decoy_hypothesis.id, sample_path, output_path=output_path, analysis_name=analysis_name, grouping_error_tolerance=grouping_error_tolerance, mass_error_tolerance=mass_error_tolerance, msn_mass_error_tolerance=msn_mass_error_tolerance, psm_fdr_threshold=psm_fdr_threshold, peak_shape_scoring_model=peak_shape_scoring_model, spectrum_batch_size=workload_size, mass_shifts=mass_shifts, rare_signatures=include_rare_signature_ions, model_retention_time=model_retention_time, tandem_scoring_model=tandem_scoring_model) _ = analyzer.start() analysis = analyzer.analysis if analysis is not None: record = project_analysis.AnalysisRecord( name=analysis.name, id=analysis.id, uuid=analysis.uuid, path=output_path, analysis_type=analysis.analysis_type, hypothesis_uuid=analysis.hypothesis.uuid, hypothesis_name=analysis.hypothesis.name, sample_name=analysis.parameters['sample_name'], user_id=channel.user.id) channel.send(Message(record.to_json(), 'new-analysis')) else: channel.send( Message("No glycopeptides were identified for \"%s\"" % (analysis_name, ))) except Exception: channel.send(Message.traceback()) channel.abort("An error occurred during analysis.")
class SampleView(SimpleViewBase): def __init__(self, record, minimum_mass=None, abundance_threshold=None): SimpleViewBase.__init__(self) self.record = record self.reader = ProcessedMzMLDeserializer(record.path) self.scan_levels = { "1": len(self.reader.extended_index.ms1_ids), "N": len(self.reader.extended_index.msn_ids) } self.minimum_mass = minimum_mass self.abundance_threshold = abundance_threshold self._chromatograms = None self.chromatograms = None self.total_ion_chromatogram = None self.oxonium_ion_chromatogram = None self.chromatogram_artist = None self.oxonium_ion_artist = None @property def chromatograms(self): if self._chromatograms is None: self.build_chromatograms() return self._chromatograms @chromatograms.setter def chromatograms(self, value): self._chromatograms = value def _estimate_threshold(self): intensity_accumulator = [] mz_accumulator = [] charge_accumulator = [] if not self.reader.extended_index.ms1_ids: self.mass_array = np.array([]) self.charge_array = np.array([]) self.intensity_array = np.array([]) self.abundance_threshold = 0 self.minimum_mass = 0 return for scan_id in self.reader.extended_index.ms1_ids: header = self.reader.get_scan_header_by_id(scan_id) intensity_accumulator.extend(header.arrays.intensity) mz_accumulator.extend(header.arrays.mz) try: charge_accumulator.extend(header['charge array']) except Exception: charge_accumulator.extend( np.ones_like(header.arrays.mz) * header.polarity) mass_array = ms_deisotope.neutral_mass(np.array(mz_accumulator), np.array(charge_accumulator)) self.mass_array = mass_array self.charge_array = np.array(charge_accumulator, dtype=int) self.intensity_array = np.array(intensity_accumulator) if self.abundance_threshold is None and intensity_accumulator: self.abundance_threshold = np.percentile(intensity_accumulator, 90) if self.minimum_mass is None and len(mass_array): counts, bins = np.histogram(self.mass_array) self.minimum_mass = np.average(bins[:-1], weights=counts) def build_oxonium_ion_chromatogram(self): window_width = 0.01 ox_time = [] ox_current = [] for scan_id in self.reader.extended_index.msn_ids: try: scan = self.reader.get_scan_header_by_id(scan_id) except AttributeError: print("Unable to resolve scan id %r" % scan_id) break mz, intens = scan.arrays total = 0 for ion in standard_oxonium_ions: coords = sweep(mz, ion.mass() + 1.007, window_width) total += intens[coords].sum() ox_time.append(scan.scan_time) ox_current.append(total) self.oxonium_ion_chromatogram = list( map(np.array, (ox_time, ox_current))) def draw_chromatograms(self): if self.chromatograms is None: self.build_chromatograms() ax = figax() chromatograms = list(self.chromatograms) if len(chromatograms): chromatograms.append(self.total_ion_chromatogram) chromatograms = [ chrom for chrom in chromatograms if len(chrom) > 0 ] a = SmoothingChromatogramArtist( chromatograms, ax=ax, colorizer=lambda *a, **k: 'lightblue') a.draw(label_function=lambda *a, **kw: "") rt, intens = self.total_ion_chromatogram.as_arrays() a.draw_generic_chromatogram("TIC", rt, intens, 'lightblue') a.ax.set_ylim(0, max(intens) * 1.1) chromatogram_artist = a fig = chromatogram_artist.ax.get_figure() fig.set_figwidth(10) fig.set_figheight(5) # if self.reader.extended_index.msn_ids: # oxonium_axis = ax.twinx() # stub = SimpleChromatogram( # self.total_ion_chromatogram.time_converter) # for key in self.total_ion_chromatogram: # stub[key] = 0 # oxonium_ion_artist = SmoothingChromatogramArtist( # [stub], # ax=oxonium_axis).draw( # label_function=lambda *a, **kw: "") # rt, intens = self.oxonium_ion_chromatogram # oxonium_axis.set_ylim(0, max(intens) * 1.1) # oxonium_axis.yaxis.tick_right() # oxonium_axis.axes.spines['right'].set_visible(True) # oxonium_axis.set_ylabel("Oxonium Abundance", fontsize=18) # oxonium_ion_artist.draw_generic_chromatogram( # "Oxonium Ions", rt, intens, 'green') else: ax.text(0.5, 0.5, "No chromatograms extracted", ha='center', fontsize=16) ax.axis('off') return png_plot(ax, patchless=True, bbox_inches='tight', width=12, height=8) def build_chromatograms(self): if self.abundance_threshold is None: self._estimate_threshold() ex = ChromatogramExtractor(self.reader, minimum_intensity=self.abundance_threshold, minimum_mass=self.minimum_mass) self.chromatograms = ex.run() self.total_ion_chromatogram = ex.total_ion_chromatogram if self.reader.extended_index.msn_ids: self.build_oxonium_ion_chromatogram() def draw_lcms_map(self): if self.abundance_threshold is None: self._estimate_threshold() ax = figax() artist = LCMSMapArtist.from_peak_loader( self.reader, threshold=self.abundance_threshold / 2., ax=ax) artist.draw() return png_plot(ax, patchless=True, bbox_inches='tight', width=10, height=10)
def test_writer(self): source_reader = MzMLLoader(self.source_data_path) fd, name = tempfile.mkstemp() with open(name, 'wb') as fh: writer = MzMLSerializer(fh, n_spectra=len(source_reader.index), deconvoluted=True) description = source_reader.file_description() writer.add_file_information(description) writer.add_file_contents("profile spectrum") writer.add_file_contents("centroid spectrum") writer.remove_file_contents("profile spectrum") instrument_configs = source_reader.instrument_configuration() for config in instrument_configs: writer.add_instrument_configuration(config) software_list = source_reader.software_list() for software in software_list: writer.add_software(software) data_processing_list = source_reader.data_processing() for dp in data_processing_list: writer.add_data_processing(dp) processing = writer.build_processing_method() writer.add_data_processing(processing) bunch = next(source_reader) bunch.precursor.pick_peaks() bunch.precursor.deconvolute() for product in bunch.products: product.pick_peaks() product.deconvolute() writer.save(bunch) writer.complete() fh.flush() writer.format() source_reader.reset() processed_reader = ProcessedMzMLDeserializer( _compression.get_opener(writer.handle.name)) for a, b in zip(source_reader.instrument_configuration(), processed_reader.instrument_configuration()): assert a.analyzers == b.analyzers for a, b in zip(source_reader, processed_reader): assert a.precursor.id == b.precursor.id assert (a.precursor.acquisition_information == b.precursor.acquisition_information) for an, bn in zip(a.products, b.products): assert an.id == bn.id assert abs(an.precursor_information.neutral_mass - bn.precursor_information.neutral_mass) < 1e-6 processed_reader.reset() description = processed_reader.file_description() assert "profile spectrum" not in description.contents assert "centroid spectrum" in description.contents sf = description.source_files[0] assert 'location' not in sf.parameters assert sf.parameters[ 'SHA-1'] == 'a2a091b82f27676da87a6c7d17cc90d2d90b8fbf' index = processed_reader.extended_index pinfo = index.find_msms_by_precursor_mass( ms_deisotope.neutral_mass(562.7397, 2)) assert len(pinfo) > 0 processed_reader.close() try: os.remove(name) os.remove(processed_reader._index_file_name) except OSError: pass
def analyze_glycopeptide_sequences(database_connection, sample_path, hypothesis_identifier, output_path, analysis_name, grouping_error_tolerance=1.5e-5, mass_error_tolerance=1e-5, msn_mass_error_tolerance=2e-5, psm_fdr_threshold=0.05, peak_shape_scoring_model=None, minimum_oxonium_threshold=0.05, workload_size=1000, channel=None, **kwargs): if peak_shape_scoring_model is None: peak_shape_scoring_model = chromatogram_solution.ChromatogramScorer( shape_fitter_type=shape_fitter. AdaptiveMultimodalChromatogramShapeFitter) database_connection = DatabaseBoundOperation(database_connection) if not os.path.exists(sample_path): channel.send( Message("Could not locate sample %r" % sample_path, "error")) return reader = ProcessedMzMLDeserializer(sample_path, use_index=False) sample_run = reader.sample_run try: hypothesis = get_by_name_or_id(database_connection, GlycopeptideHypothesis, hypothesis_identifier) except Exception: channel.send( Message("Could not locate hypothesis %r" % hypothesis_identifier, "error")) channel.abort("An error occurred during analysis.") if analysis_name is None: analysis_name = "%s @ %s" % (sample_run.name, hypothesis.name) analysis_name = validate_analysis_name(None, database_connection.session, analysis_name) try: analyzer = MzMLGlycopeptideLCMSMSAnalyzer( database_connection._original_connection, hypothesis.id, sample_path, output_path=output_path, analysis_name=analysis_name, grouping_error_tolerance=grouping_error_tolerance, mass_error_tolerance=mass_error_tolerance, msn_mass_error_tolerance=msn_mass_error_tolerance, psm_fdr_threshold=psm_fdr_threshold, peak_shape_scoring_model=peak_shape_scoring_model, oxonium_threshold=minimum_oxonium_threshold, spectra_chunk_size=workload_size) gps, unassigned, target_hits, decoy_hits = analyzer.start() analysis = analyzer.analysis record = project_analysis.AnalysisRecord( name=analysis.name, id=analysis.id, uuid=analysis.uuid, path=output_path, analysis_type=analysis.analysis_type, hypothesis_uuid=analysis.hypothesis.uuid, hypothesis_name=analysis.hypothesis.name, sample_name=analysis.parameters['sample_name'], user_id=channel.user.id) channel.send(Message(record.to_json(), 'new-analysis')) except Exception: channel.send(Message.traceback()) channel.abort("An error occurred during analysis.")
def open_file(index_file): data_file = index_file.rsplit("-", 1)[0] reader = ProcessedMzMLDeserializer(data_file, use_index=False) reader.read_index_file() return reader
def preprocess(mzml_file, database_connection, averagine=None, start_time=None, end_time=None, maximum_charge=None, name=None, msn_averagine=None, score_threshold=35., msn_score_threshold=5., missed_peaks=1, msn_missed_peaks=1, n_processes=5, storage_path=None, extract_only_tandem_envelopes=False, ms1_background_reduction=5., msn_background_reduction=0, ms1_averaging=0, channel=None): minimum_charge = 1 if maximum_charge > 0 else -1 charge_range = (minimum_charge, maximum_charge) logger.info("Begin Scan Interpolation") loader: RandomAccessScanSource = MSFileLoader(mzml_file) if len(loader) == 0: channel.abort("Cannot process an empty MS data file") start_scan = loader.get_scan_by_time(start_time) if start_scan is None: start_scan = loader[0] if loader.has_ms1_scans() == False: extract_only_tandem_envelopes = False try: start_scan_id = loader._locate_ms1_scan(start_scan).id except IndexError: start_scan_id = start_scan.id end_scan = loader.get_scan_by_time(end_time) if end_scan is None: end_scan = loader[-1] try: end_scan_id = loader._locate_ms1_scan(end_scan).id except IndexError: end_scan_id = end_scan.id loader.reset() loader.make_iterator(grouped=True) first_batch = next(loader) if first_batch.precursor is not None: is_profile = first_batch.precursor.is_profile elif first_batch.products: is_profile = first_batch.products[0].is_profile if is_profile: logger.info("Spectra are profile") else: logger.info("Spectra are centroided") logger.info("Resolving Sample Name") if name is None: name = os.path.splitext(os.path.basename(mzml_file))[0] name = validate_sample_run_name(None, database_connection, name) logger.info("Validating arguments") try: averagine = validate_averagine(averagine) except Exception: channel.abort("Could not validate MS1 Averagine %s" % averagine) try: msn_averagine = validate_averagine(msn_averagine) except Exception: channel.abort("Could not validate MSn Averagine %s" % msn_averagine) if is_profile: ms1_peak_picking_args = { "transforms": [ ms_peak_picker.scan_filter.FTICRBaselineRemoval( scale=ms1_background_reduction, window_length=2.), ms_peak_picker.scan_filter.SavitskyGolayFilter() ], 'signal_to_noise_threshold': 1.0, } if ms1_background_reduction == 0: ms1_peak_picking_args['transforms'] = [] else: ms1_peak_picking_args = { "transforms": [ ms_peak_picker.scan_filter.FTICRBaselineRemoval( scale=ms1_background_reduction, window_length=2.), ] } if ms1_background_reduction == 0: ms1_peak_picking_args['transforms'] = [] if msn_background_reduction > 0: msn_peak_picking_args = { "transforms": [ ms_peak_picker.scan_filter.FTICRBaselineRemoval( scale=msn_background_reduction, window_length=2.), ] } else: msn_peak_picking_args = {'transforms': []} ms1_deconvolution_args = { "scorer": ms_deisotope.scoring.PenalizedMSDeconVFitter(score_threshold, 2.), "averagine": averagine, "charge_range": charge_range, "max_missed_peaks": missed_peaks, "truncate_after": SampleConsumer.MS1_ISOTOPIC_PATTERN_WIDTH, "ignore_below": SampleConsumer.MS1_IGNORE_BELOW } msn_deconvolution_args = { "scorer": ms_deisotope.scoring.MSDeconVFitter(msn_score_threshold), "averagine": msn_averagine, "charge_range": charge_range, "max_missed_peaks": msn_missed_peaks, "truncate_after": SampleConsumer.MSN_ISOTOPIC_PATTERN_WIDTH, "ignore_below": SampleConsumer.MSN_IGNORE_BELOW } consumer = SampleConsumer( mzml_file, ms1_peak_picking_args=ms1_peak_picking_args, ms1_deconvolution_args=ms1_deconvolution_args, msn_peak_picking_args=msn_peak_picking_args, msn_deconvolution_args=msn_deconvolution_args, storage_path=storage_path, sample_name=name, start_scan_id=start_scan_id, end_scan_id=end_scan_id, n_processes=n_processes, extract_only_tandem_envelopes=extract_only_tandem_envelopes, ms1_averaging=ms1_averaging, cache_handler_type=ThreadedMzMLScanCacheHandler) try: consumer.start() logger.info("Updating New Sample Run") reader = ProcessedMzMLDeserializer(storage_path, use_index=False) reader.read_index_file() sample_run_data = reader.sample_run if reader.extended_index.msn_ids: sample_type = "MS/MS Sample" else: sample_type = "MS Sample" sample_run = sample.SampleRunRecord( name=sample_run_data.name, uuid=sample_run_data.uuid, completed=True, path=storage_path, sample_type=sample_type, user_id=channel.user.id) channel.send(Message(sample_run.to_json(), "new-sample-run")) except Exception: channel.send(Message.traceback()) channel.abort("An error occurred during preprocessing.")
class GlycopeptideDatabaseSearchReportCreator(ReportCreatorBase): def __init__(self, database_path, analysis_id, stream=None, threshold=5, mzml_path=None): super(GlycopeptideDatabaseSearchReportCreator, self).__init__( database_path, analysis_id, stream) self.set_template_loader(os.path.dirname(__file__)) self.mzml_path = mzml_path self.scan_loader = None self.threshold = threshold self.use_dynamic_display_mode = 0 self.analysis = self.session.query(serialize.Analysis).get(self.analysis_id) self._resolve_hypothesis_id() self._build_protein_index() self._make_scan_loader() self._glycopeptide_counter = 0 if len(self.protein_index) > 10: self.use_dynamic_display_mode = 1 def _resolve_hypothesis_id(self): self.hypothesis_id = self.analysis.hypothesis_id hypothesis = self.session.query(serialize.GlycopeptideHypothesis).get(self.hypothesis_id) if hypothesis is None: self.hypothesis_id = 1 hypothesis = self.session.query(serialize.GlycopeptideHypothesis).get( self.hypothesis_id) if hypothesis is None: raise ValueError("Could not resolve Glycopeptide Hypothesis!") def prepare_environment(self): super(GlycopeptideDatabaseSearchReportCreator, self).prepare_environment() def _build_protein_index(self): hypothesis_id = self.hypothesis_id theoretical_counts = self.session.query(Protein.name, Protein.id, func.count(Glycopeptide.id)).join( Glycopeptide).group_by(Protein.id).filter( Protein.hypothesis_id == hypothesis_id).all() matched_counts = self.session.query(Protein.name, Protein.id, func.count(IdentifiedGlycopeptide.id)).join( Glycopeptide).join( IdentifiedGlycopeptide, IdentifiedGlycopeptide.structure_id == Glycopeptide.id).group_by( Protein.id).filter( IdentifiedGlycopeptide.ms2_score > self.threshold, IdentifiedGlycopeptide.analysis_id == self.analysis_id).all() listing = [] index = {} for protein_name, protein_id, glycopeptide_count in theoretical_counts: index[protein_id] = { "protein_name": protein_name, "protein_id": protein_id, } for protein_name, protein_id, glycopeptide_count in matched_counts: entry = index[protein_id] entry['identified_glycopeptide_count'] = glycopeptide_count listing.append(entry) self.protein_index = sorted(listing, key=lambda x: x["identified_glycopeptide_count"], reverse=True) for protein_entry in self.protein_index: protein_entry['protein'] = self.session.query(Protein).get(protein_entry["protein_id"]) return self.protein_index def _make_scan_loader(self): if self.mzml_path is not None: if not os.path.exists(self.mzml_path): raise IOError("No such file {}".format(self.mzml_path)) self.scan_loader = ProcessedMzMLDeserializer(self.mzml_path) else: self.mzml_path = self.analysis.parameters['sample_path'] if not os.path.exists(self.mzml_path): raise IOError(( "No such file {}. If {} was relocated, you may need to explicily pass the" " corrected file path.").format( self.mzml_path, self.database_connection._original_connection)) self.scan_loader = ProcessedMzMLDeserializer(self.mzml_path) def iterglycoproteins(self): n = float(len(self.protein_index)) for i, row in enumerate(self.protein_index, 1): protein = row['protein'] glycopeptides = self.session.query( IdentifiedGlycopeptide).join(Glycopeptide).join( Protein).filter( IdentifiedGlycopeptide.analysis_id == self.analysis_id, Glycopeptide.hypothesis_id == self.hypothesis_id, IdentifiedGlycopeptide.ms2_score > self.threshold, Protein.id == protein.id).all() glycoprotein = IdentifiedGlycoprotein(protein, glycopeptides) self.status_update( "Processing %s (%d/%d) %0.2f%%" % ( protein.name, i, n, (i / n * 100))) yield i, glycoprotein def site_specific_abundance_plots(self, glycoprotein): axes = OrderedDict() for glyco_type in glycoprotein.glycosylation_types: for site in sorted(glycoprotein.glycosylation_sites_for(glyco_type)): spanning_site = glycoprotein.site_map[glyco_type][site] if len(spanning_site) == 0: continue bundle = BundledGlycanComposition.aggregate(spanning_site) if len(bundle) == 0: continue ax = figax() AggregatedAbundanceArtist( bundle, ax=ax, colorizer=glycan_colorizer_type_map[glyco_type]).draw() ax.set_title("%s Glycans\nat Site %d" % (glyco_type.name, site + 1,), fontsize=18) axes[site, glyco_type] = svguri_plot(ax, bbox_inches='tight') return axes def draw_glycoforms(self, glycoprotein): ax = figax() layout = GlycoformLayout(glycoprotein, glycoprotein.identified_glycopeptides, ax=ax) layout.draw() svg = layout.to_svg(scale=2.0, height_padding_scale=1.1) return svg def chromatogram_plot(self, glycopeptide): ax = figax() try: SmoothingChromatogramArtist( glycopeptide, ax=ax, label_peaks=False, colorizer=lambda x: "#48afd0").draw(legend=False) ax.set_xlabel("Time (Minutes)", fontsize=16) ax.set_ylabel("Relative Abundance", fontsize=16) return png_plot(ax, bbox_inches='tight', img_height='100%') except ValueError: return "<div style='text-align:center;'>No Chromatogram Found</div>" def spectrum_match_info(self, glycopeptide): matched_scans = [] for solution_set in glycopeptide.spectrum_matches: best_solution = solution_set.best_solution() try: selected_solution = solution_set.solution_for(glycopeptide.structure) except KeyError: continue pass_threshold = abs(selected_solution.score - best_solution.score) < 1e-6 if not pass_threshold: continue if isinstance(selected_solution.scan, SpectrumReference): scan = self.session.query(MSScan).filter( MSScan.scan_id == selected_solution.scan.id, MSScan.sample_run_id == self.analysis.sample_run_id).first().convert() else: scan = selected_solution.scan scan.score = selected_solution.score matched_scans.append(scan) spectrum_match_ref = max(glycopeptide.spectrum_matches, key=lambda x: x.score) scan_id = spectrum_match_ref.scan.scan_id scan = self.scan_loader.get_scan_by_id(scan_id) try: mass_shift = spectrum_match_ref[0].mass_shift except Exception: mass_shift = Unmodified if mass_shift.name != Unmodified.name: mass_shift = mass_shift.convert() else: mass_shift = Unmodified match = CoverageWeightedBinomialScorer.evaluate( scan, glycopeptide.structure.convert(), error_tolerance=self.analysis.parameters["fragment_error_tolerance"], mass_shift=mass_shift) specmatch_artist = TidySpectrumMatchAnnotator(match, ax=figax()) specmatch_artist.draw(fontsize=10, pretty=True) annotated_match_ax = specmatch_artist.ax scan_title = scan.id if len(scan_title) > 60: scan_title = '\n'.join(textwrap.wrap(scan_title, 60)) annotated_match_ax.set_title(scan_title, fontsize=18) annotated_match_ax.set_ylabel(annotated_match_ax.get_ylabel(), fontsize=16) annotated_match_ax.set_xlabel(annotated_match_ax.get_xlabel(), fontsize=16) sequence_logo_plot = glycopeptide_match_logo(match, ax=figax()) xlim = list(sequence_logo_plot.get_xlim()) xlim[0] += 1 sequence_logo_plot.set_xlim(xlim[0], xlim[1]) spectrum_plot = png_plot( annotated_match_ax, svg_width="100%", bbox_inches='tight', height=3 * 1.5, width=8 * 1.5, img_width="100%", patchless=True) logo_plot = png_plot( sequence_logo_plot, svg_width="100%", img_width="100%", xml_transform=scale_fix_xml_transform, bbox_inches='tight', height=2, width=6 * 1.5, patchless=True) return dict( spectrum_plot=spectrum_plot, logo_plot=logo_plot, precursor_mass_accuracy=match.precursor_mass_accuracy(), spectrum_match=match) def track_entry(self, glycopeptide): self._glycopeptide_counter += 1 if self._glycopeptide_counter % 15 == 0: self.status_update( " ... %d glycopeptides handled" % (self._glycopeptide_counter,)) return self._glycopeptide_counter def make_template_stream(self): template_obj = self.env.get_template("overview.templ") ads = serialize.AnalysisDeserializer( self.database_connection._original_connection, analysis_id=self.analysis_id) hypothesis = ads.analysis.hypothesis sample_run = ads.analysis.sample_run if self.use_dynamic_display_mode: self.status_update("Using dynamic display mode") template_stream = template_obj.stream( analysis=ads.analysis, hypothesis=hypothesis, sample_run=sample_run, protein_index=self.protein_index, glycoprotein_iterator=self.iterglycoproteins(), renderer=self, use_dynamic_display_mode=self.use_dynamic_display_mode) return template_stream
def analyze_glycan_composition(database_connection, sample_path, hypothesis_identifier, output_path, analysis_name, mass_shifts, grouping_error_tolerance=1.5e-5, mass_error_tolerance=1e-5, scoring_model=None, minimum_mass=500., smoothing_factor=None, regularization_model=None, combinatorial_mass_shift_limit=8, channel=None, **kwargs): if scoring_model is None: scoring_model = GeneralScorer database_connection = DatabaseBoundOperation(database_connection) if not os.path.exists(sample_path): channel.send( Message("Could not locate sample %r" % sample_path, "error")) return reader = ProcessedMzMLDeserializer(sample_path, use_index=False) sample_run = reader.sample_run try: hypothesis = get_by_name_or_id(database_connection, GlycanHypothesis, hypothesis_identifier) except Exception: channel.send( Message("Could not locate hypothesis %r" % hypothesis_identifier, "error")) return if analysis_name is None: analysis_name = "%s @ %s" % (sample_run.name, hypothesis.name) analysis_name = validate_analysis_name(None, database_connection.session, analysis_name) try: mass_shift_out = [] for mass_shift, multiplicity in mass_shifts: mass_shift_out.append(validate_mass_shift(mass_shift, multiplicity)) expanded = [] expanded = MzMLGlycanChromatogramAnalyzer.expand_mass_shifts( dict(mass_shift_out), limit=combinatorial_mass_shift_limit) mass_shifts = expanded except Abort: channel.send(Message.traceback()) return mass_shifts = expanded try: analyzer = MzMLGlycanChromatogramAnalyzer( database_connection._original_connection, hypothesis.id, sample_path=sample_path, output_path=output_path, mass_shifts=mass_shifts, mass_error_tolerance=mass_error_tolerance, grouping_error_tolerance=grouping_error_tolerance, scoring_model=scoring_model, analysis_name=analysis_name, minimum_mass=minimum_mass) analyzer.start() analysis = analyzer.analysis record = project_analysis.AnalysisRecord( name=analysis.name, id=analysis.id, uuid=analysis.uuid, path=output_path, analysis_type=analysis.analysis_type, hypothesis_uuid=analysis.hypothesis.uuid, hypothesis_name=analysis.hypothesis.name, sample_name=analysis.parameters['sample_name'], user_id=channel.user.id) channel.send(Message(record.to_json(), 'new-analysis')) except Exception: channel.send(Message.traceback()) channel.abort("An error occurred during analysis.")
class GlycopeptideDatabaseSearchReportCreator(ReportCreatorBase): def __init__(self, database_path, analysis_id, stream=None, threshold=5, mzml_path=None): super(GlycopeptideDatabaseSearchReportCreator, self).__init__(database_path, analysis_id, stream) self.set_template_loader(os.path.dirname(__file__)) self.mzml_path = mzml_path self.scan_loader = None self.threshold = threshold self.analysis = self.session.query(serialize.Analysis).get( self.analysis_id) self._resolve_hypothesis_id() self._build_protein_index() self._make_scan_loader() self._glycopeptide_counter = 0 def _resolve_hypothesis_id(self): self.hypothesis_id = self.analysis.hypothesis_id hypothesis = self.session.query(serialize.GlycopeptideHypothesis).get( self.hypothesis_id) if hypothesis is None: self.hypothesis_id = 1 hypothesis = self.session.query( serialize.GlycopeptideHypothesis).get(self.hypothesis_id) if hypothesis is None: raise ValueError("Could not resolve Glycopeptide Hypothesis!") def prepare_environment(self): super(GlycopeptideDatabaseSearchReportCreator, self).prepare_environment() def _build_protein_index(self): hypothesis_id = self.hypothesis_id theoretical_counts = self.session.query( Protein.name, Protein.id, func.count(Glycopeptide.id)).join(Glycopeptide).group_by( Protein.id).filter( Protein.hypothesis_id == hypothesis_id).all() matched_counts = self.session.query( Protein.name, Protein.id, func.count(IdentifiedGlycopeptide.id)).join(Glycopeptide).join( IdentifiedGlycopeptide, IdentifiedGlycopeptide.structure_id == Glycopeptide.id ).group_by(Protein.id).filter( IdentifiedGlycopeptide.ms2_score > self.threshold, IdentifiedGlycopeptide.analysis_id == self.analysis_id).all() listing = [] index = {} for protein_name, protein_id, glycopeptide_count in theoretical_counts: index[protein_id] = { "protein_name": protein_name, "protein_id": protein_id, } for protein_name, protein_id, glycopeptide_count in matched_counts: entry = index[protein_id] entry['identified_glycopeptide_count'] = glycopeptide_count listing.append(entry) self.protein_index = sorted( listing, key=lambda x: x["identified_glycopeptide_count"], reverse=True) for protein_entry in self.protein_index: protein_entry['protein'] = self.session.query(Protein).get( protein_entry["protein_id"]) return self.protein_index def _make_scan_loader(self): if self.mzml_path is not None: if not os.path.exists(self.mzml_path): raise IOError("No such file {}".format(self.mzml_path)) self.scan_loader = ProcessedMzMLDeserializer(self.mzml_path) else: self.mzml_path = self.analysis.parameters['sample_path'] if not os.path.exists(self.mzml_path): raise IOError(( "No such file {}. If {} was relocated, you may need to explicily pass the" " corrected file path.").format( self.mzml_path, self.database_connection._original_connection)) self.scan_loader = ProcessedMzMLDeserializer(self.mzml_path) def iterglycoproteins(self): n = float(len(self.protein_index)) for i, row in enumerate(self.protein_index, 1): protein = row['protein'] glycopeptides = self.session.query(IdentifiedGlycopeptide).join( Glycopeptide).join(Protein).filter( IdentifiedGlycopeptide.analysis_id == self.analysis_id, Glycopeptide.hypothesis_id == self.hypothesis_id, IdentifiedGlycopeptide.ms2_score > self.threshold, Protein.id == protein.id).all() glycoprotein = IdentifiedGlycoprotein(protein, glycopeptides) glycoprotein.id = protein.id self.status_update("Processing %s (%d/%d) %0.2f%%" % (protein.name, i, n, (i / n * 100))) yield glycoprotein def site_specific_abundance_plots(self, glycoprotein): axes = OrderedDict() for glyco_type in glycoprotein.glycosylation_types: for site in sorted( glycoprotein.glycosylation_sites_for(glyco_type)): spanning_site = glycoprotein.site_map[glyco_type][site] if len(spanning_site) == 0: continue bundle = BundledGlycanComposition.aggregate(spanning_site) ax = figax() AggregatedAbundanceArtist( bundle, ax=ax, colorizer=glycan_colorizer_type_map[glyco_type]).draw() ax.set_title("%s Glycans\nat Site %d" % ( glyco_type.name, site, ), fontsize=18) axes[site, glyco_type] = svguri_plot(ax, bbox_inches='tight') return axes def draw_glycoforms(self, glycoprotein): ax = figax() layout = GlycoformLayout(glycoprotein, glycoprotein.identified_glycopeptides, ax=ax) layout.draw() svg = layout.to_svg(scale=2.0, height_padding_scale=1.1) # svg = plot_glycoforms_svg( # glycoprotein, glycoprotein.identified_glycopeptides, ax=ax, # margin_left=85, margin_top=0, height_padding_scale=1.1) return svg def chromatogram_plot(self, glycopeptide): ax = figax() try: SmoothingChromatogramArtist( glycopeptide, ax=ax, label_peaks=False, colorizer=lambda x: "#48afd0").draw(legend=False) ax.set_xlabel("Time (Minutes)", fontsize=16) ax.set_ylabel("Relative Abundance", fontsize=16) return png_plot(ax, bbox_inches='tight', img_height='100%') except ValueError: return "<div style='text-align:center;'>No Chromatogram Found</div>" def spectrum_match_info(self, glycopeptide): matched_scans = [] for solution_set in glycopeptide.spectrum_matches: best_solution = solution_set.best_solution() try: selected_solution = solution_set.solution_for( glycopeptide.structure) except KeyError: continue pass_threshold = abs(selected_solution.score - best_solution.score) < 1e-6 if not pass_threshold: continue if isinstance(selected_solution.scan, SpectrumReference): scan = self.session.query(MSScan).filter( MSScan.scan_id == selected_solution.scan.id, MSScan.sample_run_id == self.analysis.sample_run_id).first().convert() else: scan = selected_solution.scan scan.score = selected_solution.score matched_scans.append(scan) spectrum_match_ref = max(glycopeptide.spectrum_matches, key=lambda x: x.score) scan_id = spectrum_match_ref.scan.scan_id scan = self.scan_loader.get_scan_by_id(scan_id) match = CoverageWeightedBinomialScorer.evaluate( scan, glycopeptide.structure.convert(), error_tolerance=self.analysis. parameters["fragment_error_tolerance"]) specmatch_artist = SpectrumMatchAnnotator(match, ax=figax()) specmatch_artist.draw(fontsize=10, pretty=True) annotated_match_ax = specmatch_artist.ax annotated_match_ax.set_title("%s\n" % (scan.id, ), fontsize=18) annotated_match_ax.set_ylabel(annotated_match_ax.get_ylabel(), fontsize=16) annotated_match_ax.set_xlabel(annotated_match_ax.get_xlabel(), fontsize=16) sequence_logo_plot = glycopeptide_match_logo(match, ax=figax()) xlim = list(sequence_logo_plot.get_xlim()) xlim[0] += 1 sequence_logo_plot.set_xlim(xlim[0], xlim[1]) spectrum_plot = png_plot(annotated_match_ax, svg_width="100%", bbox_inches='tight', height=3 * 1.5, width=8 * 1.5, img_width="100%", patchless=True) logo_plot = png_plot(sequence_logo_plot, svg_width="100%", img_width="100%", xml_transform=scale_fix_xml_transform, bbox_inches='tight', height=2, width=6 * 1.5, patchless=True) return dict(spectrum_plot=spectrum_plot, logo_plot=logo_plot, precursor_mass_accuracy=match.precursor_mass_accuracy(), spectrum_match=match) def track_entry(self, glycopeptide): self._glycopeptide_counter += 1 if self._glycopeptide_counter % 15 == 0: self.status_update(" ... %d glycopeptides handled" % (self._glycopeptide_counter, )) return self._glycopeptide_counter def make_template_stream(self): template_obj = self.env.get_template("overview.templ") ads = serialize.AnalysisDeserializer( self.database_connection._original_connection, analysis_id=self.analysis_id) hypothesis = ads.analysis.hypothesis sample_run = ads.analysis.sample_run template_stream = template_obj.stream( analysis=ads.analysis, hypothesis=hypothesis, sample_run=sample_run, protein_index=self.protein_index, glycoprotein_iterator=self.iterglycoproteins(), renderer=self, ) return template_stream