def run(self): self.loader = MSFileLoader(self.mzml_path) if self.start_scan is not None: self.loader.start_from_scan(self.start_scan) count = 0 if self.max_scans is None: max_scans = float('inf') else: max_scans = self.max_scans end_scan = self.end_scan while count < max_scans: try: batch, ids = self._make_scan_batch() if len(batch) > 0: self.queue.put(batch) count += len(ids) if end_scan in ids or len(ids) == 0: break except StopIteration: break except Exception as e: log_handle.error("An error occurred while fetching scans", e) break if self.no_more_event is not None: self.no_more_event.set() log_handle.log("All Scan IDs have been dealt. %d scan bunches." % (count, )) else: self.queue.put(DONE)
def _worker_loop(self): has_work = True i = 0 def drain_queue(): current_work = [] try: while len(current_work) < 300: current_work.append(self.queue.get_nowait()) except QueueEmptyException: pass if len(current_work) > 5: log_handle.log("Drained Write Queue of %d items" % (len(current_work), )) return current_work while has_work: try: next_bunch = self.queue.get(True, 1) if next_bunch == DONE: has_work = False continue if self.log_inserts and (i % 100 == 0): log_handle.log("Saving %r" % (next_bunch[0].id, )) self._save_bunch(*next_bunch) self.commit_counter += 1 + len(next_bunch[1]) i += 1 if self.queue.qsize() > 0: current_work = drain_queue() for next_bunch in current_work: if next_bunch == DONE: has_work = False else: if self.log_inserts and (i % 100 == 0): log_handle.log("Saving %r" % (next_bunch[0].id, )) self._save_bunch(*next_bunch) self.commit_counter += 1 + len(next_bunch[1]) i += 1 if self.commit_counter - self.last_commit_count > self.commit_interval: self.last_commit_count = self.commit_counter log_handle.log( "Syncing Scan Cache To Disk (%d items waiting)" % (self.queue.qsize(), )) self.serializer.commit() if self.serializer.is_sqlite(): self.serializer.session.execute( "PRAGMA wal_checkpoint(SQLITE_CHECKPOINT_RESTART);" ) self.serializer.session.expunge_all() except QueueEmptyException: continue except Exception as e: log_handle.error( "An error occurred while writing scans to disk", e) self.serializer.commit() self.serializer.session.expunge_all()
def create(self): for index in self.table.indexes: try: conn = self.session.connection() index.create(conn) self.session.commit() except (OperationalError, ProgrammingError) as e: self.session.rollback() log_handle.error("An error occurred during index.create for %r" % index, exception=e)
def save_bunch(self, precursor, products): try: self.serializer.save(ScanBunch(precursor, products), commit=False) self.commit_counter += 1 + len(products) if self.commit_counter - self.last_commit_count > self.commit_interval: self.last_commit_count = self.commit_counter self.commit() except Exception as e: log_handle.error("An error occured while saving scans", e)
def drop(self): for index in self.table.indexes: # log_handle.log("Dropping Index %r" % index) try: conn = self.session.connection() index.drop(conn) self.session.commit() except (OperationalError, ProgrammingError) as e: self.session.rollback() log_handle.error("An error occurred during index.drop for %r" % index, exception=e)
def create(self): for index in self.table.indexes: try: conn = self.session.connection() index.create(conn) self.session.commit() except (OperationalError, ProgrammingError) as e: self.session.rollback() log_handle.error( "An error occurred during index.create for %r" % index, exception=e)
def _worker_loop(self): has_work = True i = 0 def drain_queue(): current_work = [] try: while len(current_work) < 300: current_work.append(self.queue.get_nowait()) except QueueEmptyException: pass if len(current_work) > 5: log_handle.log("Drained Write Queue of %d items" % (len(current_work), )) return current_work while has_work: try: next_bunch = self.queue.get(True, 1) i += 1 if next_bunch == DONE: has_work = False continue # log_handle.log("Writing %s %f (%d, %d)" % ( # next_bunch[0].id, # next_bunch[0].scan_time, # len(next_bunch[0].deconvoluted_peak_set) + sum( # [len(p.deconvoluted_peak_set) for p in next_bunch[1]]), # self.queue.qsize())) self._save_bunch(*next_bunch) if self.queue.qsize() > 0: current_work = drain_queue() for next_bunch in current_work: i += 1 if next_bunch == DONE: has_work = False else: # log_handle.log("Writing %s %f (%d, %d)" % ( # next_bunch[0].id, # next_bunch[0].scan_time, # len(next_bunch[0].deconvoluted_peak_set) + sum( # [len(p.deconvoluted_peak_set) for p in next_bunch[1]]), # self.queue.qsize())) self._save_bunch(*next_bunch) i += 1 except QueueEmptyException: continue except Exception as e: log_handle.error( "An error occurred while writing scans to disk", e)
def _worker_loop(self): has_work = True i = 0 def drain_queue(): current_work = [] try: while len(current_work) < 300: current_work.append(self.queue.get_nowait()) except QueueEmptyException: pass if len(current_work) > 5: log_handle.log("Drained Write Queue of %d items" % (len(current_work), )) return current_work while has_work: try: next_bunch = self.queue.get(True, 1) i += 1 if next_bunch == DONE: has_work = False continue self._save_bunch(*next_bunch) if self.queue.qsize() > 0: current_work = drain_queue() for next_bunch in current_work: i += 1 if next_bunch == DONE: has_work = False else: self._save_bunch(*next_bunch) i += 1 except QueueEmptyException: continue except Exception as e: log_handle.error( "An error occurred while writing scans to disk", e)
def _make_scan_batch(self): batch = [] scan_ids = [] for _i in range(self.batch_size): try: bunch = next(self.loader) scan, products = bunch if scan is not None: scan_id = scan.id else: scan_id = None product_scan_ids = [p.id for p in products] except StopIteration: break except Exception as e: log_handle.error("An error occurred in _make_scan_batch", e) break if not self.ignore_tandem_scans: batch.append((scan_id, product_scan_ids, True)) else: batch.append((scan_id, product_scan_ids, False)) scan_ids.append(scan_id) return batch, scan_ids
def run(self): self.loader = MSFileLoader(self.ms_file_path, huge_tree=huge_tree, decode_binary=False) if self.start_scan is not None: try: self.loader.start_from_scan( self.start_scan, require_ms1=self.loader.has_ms1_scans(), grouped=True) except IndexError as e: log_handle.error("An error occurred while locating start scan", e) self.loader.reset() self.loader.make_iterator(grouped=True) except AttributeError: log_handle.error( "The reader does not support random access, start time will be ignored", e) self.loader.reset() self.loader.make_iterator(grouped=True) else: self.loader.make_iterator(grouped=True) count = 0 last = 0 if self.max_scans is None: max_scans = float('inf') else: max_scans = self.max_scans end_scan = self.end_scan while count < max_scans: try: batch, ids = self._make_scan_batch() if len(batch) > 0: self.queue.put(batch) count += len(ids) if (count - last) > 1000: last = count self.queue.join() if (end_scan in ids and end_scan is not None) or len(ids) == 0: log_handle.log("End Scan Found") break except StopIteration: break except Exception as e: log_handle.error("An error occurred while fetching scans", e) break if self.no_more_event is not None: self.no_more_event.set() log_handle.log("All Scan IDs have been dealt. %d scan bunches." % (count, )) else: self.queue.put(DONE)
def _worker_loop(self): has_work = True i = 0 def drain_queue(): current_work = [] try: while len(current_work) < 300: current_work.append(self.queue.get_nowait()) except QueueEmptyException: pass if len(current_work) > 5: log_handle.log("Drained Write Queue of %d items" % (len(current_work),)) return current_work while has_work: try: next_bunch = self.queue.get(True, 1) i += 1 if next_bunch == DONE: has_work = False continue self._save_bunch(*next_bunch) if self.queue.qsize() > 0: current_work = drain_queue() for next_bunch in current_work: i += 1 if next_bunch == DONE: has_work = False else: self._save_bunch(*next_bunch) i += 1 except QueueEmptyException: continue except Exception as e: log_handle.error("An error occurred while writing scans to disk", e)
def configure_storage(cls, path=None, name=None, source=None): if path is not None: if name is None: sample_name = os.path.basename(path) else: sample_name = name else: path = "processed.mzML" if source is not None: reader = MSFileLoader(source.scan_source) n_spectra = len(reader.index) deconvoluting = source.deconvoluting inst = cls(path, sample_name, n_spectra=n_spectra, deconvoluted=deconvoluting) try: description = reader.file_description() except AttributeError: description = FileInformation() source_file_metadata = MetadataSourceFile.from_path(source.scan_source) inst.serializer.add_file_information(description) try: inst.serializer.remove_file_contents("profile spectrum") except KeyError: pass inst.serializer.add_file_contents("centroid spectrum") if source_file_metadata not in description.source_files: inst.serializer.add_source_file(source_file_metadata) try: instrument_configs = reader.instrument_configuration() for config in instrument_configs: inst.serializer.add_instrument_configuration(config) except Exception as e: log_handle.error( "An error occurred while writing instrument configuration", e) for trans in source.ms1_peak_picking_args.get("transforms"): inst.register_parameter("parameter: ms1-%s" % trans.__class__.__name__, repr(trans)) if deconvoluting: if source.ms1_deconvolution_args.get("averagine"): inst.register_parameter( "parameter: ms1-averagine", repr(source.ms1_deconvolution_args.get("averagine"))) if source.ms1_deconvolution_args.get("scorer"): inst.register_parameter( "parameter: ms1-scorer", repr(source.ms1_deconvolution_args.get("scorer"))) if source.ms1_averaging > 0: inst.register_parameter("parameter: ms1-averaging", repr(source.ms1_averaging)) if source.ignore_tandem_scans: inst.register_parameter("parameter: ignore-tandem-scans", "") if source.extract_only_tandem_envelopes: inst.register_parameter("parameter: extract-only-tandem-envelopes", "") if source.msn_peak_picking_args is not None: for trans in source.msn_peak_picking_args.get("transforms"): inst.register_parameter("parameter: msn-%s" % trans.__class__.__name__, repr(trans)) if deconvoluting: if source.msn_deconvolution_args.get("averagine"): inst.register_parameter( "parameter: msn-averagine", repr(source.msn_deconvolution_args.get("averagine"))) if source.msn_deconvolution_args.get("scorer"): inst.register_parameter( "parameter: msn-scorer", repr(source.msn_deconvolution_args.get("scorer"))) data_processing = inst.serializer.build_processing_method() inst.serializer.add_data_processing(data_processing) else: n_spectra = 2e5 inst = cls(path, sample_name, n_spectra=n_spectra) # Force marshalling of controlled vocabularies early. inst.serializer.writer.param("32-bit float") return inst
def configure_storage(cls, path=None, name=None, source=None): if path is not None: if name is None: sample_name = os.path.basename(path) else: sample_name = name else: path = "processed.mzML" if source is not None: reader = MSFileLoader(source.scan_source) n_spectra = len(reader.index) deconvoluting = source.deconvoluting inst = cls(path, sample_name, n_spectra=n_spectra, deconvoluted=deconvoluting) try: description = reader.file_description() except AttributeError: description = FileInformation() source_file_metadata = MetadataSourceFile.from_path( source.scan_source) inst.serializer.add_file_information(description) try: inst.serializer.remove_file_contents("profile spectrum") except KeyError: pass inst.serializer.add_file_contents("centroid spectrum") if source_file_metadata not in description.source_files: inst.serializer.add_source_file(source_file_metadata) try: instrument_configs = reader.instrument_configuration() for config in instrument_configs: inst.serializer.add_instrument_configuration(config) except Exception as e: log_handle.error( "An error occurred while writing instrument configuration", e) for trans in source.ms1_peak_picking_args.get("transforms"): inst.register_parameter( "parameter: ms1-%s" % trans.__class__.__name__, repr(trans)) if deconvoluting: if source.ms1_deconvolution_args.get("averagine"): inst.register_parameter( "parameter: ms1-averagine", repr(source.ms1_deconvolution_args.get("averagine"))) if source.ms1_deconvolution_args.get("scorer"): inst.register_parameter( "parameter: ms1-scorer", repr(source.ms1_deconvolution_args.get("scorer"))) if source.ms1_averaging > 0: inst.register_parameter("parameter: ms1-averaging", repr(source.ms1_averaging)) if source.ignore_tandem_scans: inst.register_parameter("parameter: ignore-tandem-scans", "") if source.extract_only_tandem_envelopes: inst.register_parameter( "parameter: extract-only-tandem-envelopes", "") if source.msn_peak_picking_args is not None: for trans in source.msn_peak_picking_args.get("transforms"): inst.register_parameter( "parameter: msn-%s" % trans.__class__.__name__, repr(trans)) if deconvoluting: if source.msn_deconvolution_args.get("averagine"): inst.register_parameter( "parameter: msn-averagine", repr(source.msn_deconvolution_args.get("averagine"))) if source.msn_deconvolution_args.get("scorer"): inst.register_parameter( "parameter: msn-scorer", repr(source.msn_deconvolution_args.get("scorer"))) data_processing = inst.serializer.build_processing_method() inst.serializer.add_data_processing(data_processing) else: n_spectra = 2e5 inst = cls(path, sample_name, n_spectra=n_spectra) # Force marshalling of controlled vocabularies early. inst.serializer.writer.param("32-bit float") return inst