def call_debruijn_graph(self, windows, reads): """Helper function to call debruijn_graph module.""" windows_haplotypes = [] # Build and process de-Bruijn graph for each window. sam_reader = sam.InMemorySamReader(reads) for window in windows: if window.end - window.start > self.config.ws_config.max_window_size: continue if not self.ref_reader.is_valid(window): continue ref = self.ref_reader.query(window) window_reads = list(sam_reader.query(window)) with timer.Timer() as t: graph = debruijn_graph.build(ref, window_reads, self.config.dbg_config) graph_building_time = t.GetDuration() if not graph: candidate_haplotypes = [ref] else: candidate_haplotypes = graph.candidate_haplotypes() if candidate_haplotypes and candidate_haplotypes != [ref]: candidate_haplotypes_info = realigner_pb2.CandidateHaplotypes( span=window, haplotypes=candidate_haplotypes) windows_haplotypes.append(candidate_haplotypes_info) self.diagnostic_logger.log_graph_metrics( window, graph, candidate_haplotypes, graph_building_time) return windows_haplotypes
def _initialize(self): """Initialize the resources needed for this work in the current env.""" if self.initialized: raise ValueError('Cannot initialize this object twice') self.ref_reader = fasta.IndexedFastaReader( self.options.reference_filename) self.sam_reader = self._make_sam_reader() self.in_memory_sam_reader = sam.InMemorySamReader([]) if self.options.realigner_enabled: self.realigner = realigner.Realigner( self.options.realigner_options, self.ref_reader) self.pic = pileup_image.PileupImageCreator( ref_reader=self.ref_reader, sam_reader=self.in_memory_sam_reader, options=self.options.pic_options) if in_training_mode(self.options): self.labeler = self._make_labeler_from_options() self.variant_caller = variant_caller.VariantCaller( self.options.variant_caller_options) self.random = np.random.RandomState(self.options.random_seed) self.initialized = True