示例#1
0
def get_signal(fast5_fn, read_id, scale=True):
    """ Get raw signal from read.
    """
    with get_fast5_file(fast5_fn, 'r') as fast5_fp:
        raw_sig = fast5_fp.get_read(read_id).get_raw_data()

    if scale:
        med, mad = mh.med_mad(raw_sig)
        raw_sig = (raw_sig - med) / mad

    return raw_sig
示例#2
0
def get_signal(read, scale=True):
    """Get raw signal from read."""
    try:
        raw_sig = read.get_raw_data()
    except IOError:
        raise mh.MegaError("Error extracting raw data. Ensure VBZ plugin is " +
                           "installed (if applicable).")

    if scale:
        med, mad = mh.med_mad(raw_sig)
        raw_sig = ((raw_sig - med) / mad).astype(np.float32)

    return raw_sig
示例#3
0
    def extract_signal_info(self, fast5_fn, read_id, extract_dacs=False):
        read = fast5_io.get_read(fast5_fn, read_id)
        seq_summ_info = mh.extract_seq_summary_info(read)
        dacs = scale_params = raw_sig = None
        if extract_dacs:
            # if not processing signal mappings, don't save dacs
            dacs = fast5_io.get_signal(read, scale=False)
            # scale parameters and trimming computed by guppy
            if not self.model_type == PYGUPPY_NAME:
                scale_params = mh.med_mad(dacs)
                raw_sig = (dacs - scale_params[0]) / scale_params[1]

        if self.model_type == TAI_NAME:
            if raw_sig is None:
                raw_sig = fast5_io.get_signal(read, scale=True)
            sig_data = SIGNAL_DATA(
                raw_signal=raw_sig, dacs=dacs, scale_params=scale_params,
                raw_len=raw_sig.shape[0], fast5_fn=fast5_fn, read_id=read_id,
                stride=self.stride)
            return sig_data, seq_summ_info
        elif self.model_type == FAST5_NAME:
            bc_mod_post = fast5_io.get_posteriors(read)
            if extract_dacs:
                trim_start, trim_len = fast5_io.get_signal_trim_coordiates(
                    read)
                dacs = dacs[trim_start:trim_start + trim_len]
            sig_data = SIGNAL_DATA(
                raw_len=bc_mod_post.shape[0] * self.stride, dacs=dacs,
                fast5_fn=fast5_fn, read_id=read_id, stride=self.stride,
                posteriors=bc_mod_post)
            return sig_data, seq_summ_info
        elif self.model_type == PYGUPPY_NAME:
            if dacs is None:
                dacs = fast5_io.get_signal(read, scale=False)
            sig_data = SIGNAL_DATA(
                dacs=dacs, raw_len=dacs.shape[0], fast5_fn=fast5_fn,
                read_id=read_id, stride=self.stride,
                channel_info=read.get_channel_info())
            return sig_data, seq_summ_info

        raise mh.MegaError('Invalid model type')