def get_signal(fast5_fn, read_id, scale=True): """ Get raw signal from read. """ with get_fast5_file(fast5_fn, 'r') as fast5_fp: raw_sig = fast5_fp.get_read(read_id).get_raw_data() if scale: med, mad = mh.med_mad(raw_sig) raw_sig = (raw_sig - med) / mad return raw_sig
def get_signal(read, scale=True): """Get raw signal from read.""" try: raw_sig = read.get_raw_data() except IOError: raise mh.MegaError("Error extracting raw data. Ensure VBZ plugin is " + "installed (if applicable).") if scale: med, mad = mh.med_mad(raw_sig) raw_sig = ((raw_sig - med) / mad).astype(np.float32) return raw_sig
def extract_signal_info(self, fast5_fn, read_id, extract_dacs=False): read = fast5_io.get_read(fast5_fn, read_id) seq_summ_info = mh.extract_seq_summary_info(read) dacs = scale_params = raw_sig = None if extract_dacs: # if not processing signal mappings, don't save dacs dacs = fast5_io.get_signal(read, scale=False) # scale parameters and trimming computed by guppy if not self.model_type == PYGUPPY_NAME: scale_params = mh.med_mad(dacs) raw_sig = (dacs - scale_params[0]) / scale_params[1] if self.model_type == TAI_NAME: if raw_sig is None: raw_sig = fast5_io.get_signal(read, scale=True) sig_data = SIGNAL_DATA( raw_signal=raw_sig, dacs=dacs, scale_params=scale_params, raw_len=raw_sig.shape[0], fast5_fn=fast5_fn, read_id=read_id, stride=self.stride) return sig_data, seq_summ_info elif self.model_type == FAST5_NAME: bc_mod_post = fast5_io.get_posteriors(read) if extract_dacs: trim_start, trim_len = fast5_io.get_signal_trim_coordiates( read) dacs = dacs[trim_start:trim_start + trim_len] sig_data = SIGNAL_DATA( raw_len=bc_mod_post.shape[0] * self.stride, dacs=dacs, fast5_fn=fast5_fn, read_id=read_id, stride=self.stride, posteriors=bc_mod_post) return sig_data, seq_summ_info elif self.model_type == PYGUPPY_NAME: if dacs is None: dacs = fast5_io.get_signal(read, scale=False) sig_data = SIGNAL_DATA( dacs=dacs, raw_len=dacs.shape[0], fast5_fn=fast5_fn, read_id=read_id, stride=self.stride, channel_info=read.get_channel_info()) return sig_data, seq_summ_info raise mh.MegaError('Invalid model type')