Exemplo n.º 1
0
    def __init__(self, input_fofn, fasta_filename=None, prob_threshold=0.03, window_size=5):

        self.qver = basQVcacher()
        self.input_fofn = input_fofn
        self.seqids = []
        self.prob_threshold = prob_threshold
        self.window_size = window_size
        self.full_prob = None

        with open(self.input_fofn) as f:
            for line in f:
                self.qver.add_bash5(line.strip())

        if fasta_filename is not None:
            self.add_seqs_from_fasta(fasta_filename)
Exemplo n.º 2
0
    def __init__(self, input_fofn, fasta_filename=None,
            prob_threshold=.03, window_size=5):

        self.qver = basQVcacher()
        self.input_fofn = input_fofn
        self.seqids = []
        self.prob_threshold = prob_threshold
        self.window_size = window_size
        self.full_prob = None

        with open(self.input_fofn) as f:
            for line in f:
                self.qver.add_bash5(line.strip())

        if fasta_filename is not None:
            self.add_seqs_from_fasta(fasta_filename)
Exemplo n.º 3
0
def ice_fa2fq(in_fa, ccs_fofn, out_fq):
    """Convert an input FASTA file to an output FASTQ file,
       reading QVs from the input ccs.h5 or ccs FOFN.
    """

    qver = basQVcacher()
    if ccs_fofn:
        if ccs_fofn.endswith(".h5"):  # Input is a ccs.h5 file not a FOFN.
            qver.add_bash5(ccs_fofn)
        else:  # Input is a ccs FOFN containing multiple ccs.h5 files.
            for ccs_fn in get_files_from_fofn(ccs_fofn):
                qver.add_bash5(ccs_fn)
    bas_handlers = {}

    with FastaReader(in_fa) as reader, \
            FastqWriter(out_fq) as writer:
        for r in reader:
            seqid = r.name.split(' ')[0]
            movie, hn, s_e = "", "", ""
            try:
                movie, hn, s_e = seqid.split('/')
                hn = int(hn)
            except ValueError:
                raise ValueError("{seqid} is not a valid CCS read".
                                 format(seqid=seqid))
            if ccs_fofn:
                try:
                    bas_file = qver.bas_files[movie][seqid]
                    if bas_file not in bas_handlers:
                        bas_handlers[bas_file] = BasH5Reader(bas_file)
                except KeyError:
                    raise IOError("Could not read {s} from input ccs fofn.".
                              format(s=seqid))
                logging.debug("Getting QVs for {name} ...".format(name=r.name))
                qvs = get_qv_from_bas_handler(bas_handler=bas_handlers[bas_file],
                                          hn=hn, s_e=s_e,
                                          qv_name="QualityValue")
            else: #No quality values provided to pbtranscript.py cluster
                qvs = [60]*len(r.sequence) # No information given, have strong belief in the base calls
            if len(r.sequence) != len(qvs):
                raise ValueError("Sequence and QVs of {r} should be the same!".
                                 format(r=r.name))
            writer.writeRecord(r.name, r.sequence, qvs)

    for bas_file, bas_handler in bas_handlers.iteritems():
        logging.debug("Closing {bas_file} ...".format(bas_file=bas_file))
        bas_handler.close()
Exemplo n.º 4
0
def ice_fa2fq(in_fa, ccs_fofn, out_fq):
    """Convert an input FASTA file to an output FASTQ file,
       reading QVs from the input ccs.h5 or ccs FOFN.
    """

    qver = basQVcacher()
    if ccs_fofn.endswith(".h5"):  # Input is a ccs.h5 file not a FOFN.
        qver.add_bash5(ccs_fofn)
    else:  # Input is a ccs FOFN containing multiple ccs.h5 files.
        for ccs_fn in get_files_from_fofn(ccs_fofn):
            qver.add_bash5(ccs_fn)

    bas_handlers = {}

    with FastaReader(in_fa) as reader, \
            FastqWriter(out_fq) as writer:
        for r in reader:
            seqid = r.name.split(' ')[0]
            movie, hn, s_e = "", "", ""
            try:
                movie, hn, s_e = seqid.split('/')
                hn = int(hn)
            except ValueError:
                raise ValueError(
                    "{seqid} is not a valid CCS read".format(seqid=seqid))
            try:
                bas_file = qver.bas_files[movie][seqid]
                if bas_file not in bas_handlers:
                    bas_handlers[bas_file] = BasH5Reader(bas_file)
            except KeyError:
                raise IOError(
                    "Could not read {s} from input ccs fofn.".format(s=seqid))
            logging.debug("Getting QVs for {name} ...".format(name=r.name))
            qvs = get_qv_from_bas_handler(bas_handler=bas_handlers[bas_file],
                                          hn=hn,
                                          s_e=s_e,
                                          qv_name="QualityValue")
            if len(r.sequence) != len(qvs):
                raise ValueError(
                    "Sequence and QVs of {r} should be the same!".format(
                        r=r.name))
            writer.writeRecord(r.name, r.sequence, qvs)

    for bas_file, bas_handler in bas_handlers.iteritems():
        logging.debug("Closing {bas_file} ...".format(bas_file=bas_file))
        bas_handler.close()