Пример #1
0
class CCSInput(object):
    """
    Wrapper class for handling multiple formats specifying CCS sequences.
    The old convention was to use .fasta, but we would like to be able to pass
    the classifier a ConsensusReadSet (i.e. .bam files) instead for use within
    pbsmrtpipe.
    """
    def __init__(self, file_name):
        self.file_name = file_name
        self._is_fasta = False
        self.ext = op.splitext(file_name)[1].upper()
        if self.ext in [".FA", ".FASTA"]:
            self._dataset = FastaReader(file_name)
            self._is_fasta = True
        elif self.ext == ".BAM":
            self._dataset = openDataFile(file_name)
        else:  # either contigset.xml or consensusreadset.xml
            assert self.ext == ".XML"
            self._dataset = openDataSet(file_name)
            if isinstance(self._dataset, ContigSet):
                self._is_fasta = True

    def __iter__(self):
        for rec in self._dataset:
            if not self._is_fasta:
                rec = CCSBamSequence(rec.peer)
            yield rec

    def close(self):
        """Close all datasets."""
        self._dataset.close()

    def __enter__(self):
        return self

    def __exit__(self, exc_type, exc_value, traceback):
        self.close()

    def __len__(self):
        if not self._is_fasta:
            return len(self._dataset)
        else:
            if self.ext in [".FA", ".FASTA"]:
                return len([r for r in FastaReader(self.file_name)])
            else:  # contigset
                n = 0
                for rr in self._dataset.resourceReaders():
                    n += len([r for r in rr])
                return n

    def __delitem__(self, dummy_name):
        raise NotImplementedError("%s.%s" %
                                  (self.__class__.__name__, "__delitem__"))

    def __setitem__(self, dummy_index, dummy_name):
        raise NotImplementedError("%s.%s" %
                                  (self.__class__.__name__, "__setitem__"))

    def __getitem__(self, key):
        raise NotImplementedError("%s.%s" %
                                  (self.__class__.__name__, "__getitem__"))
Пример #2
0
class CCSInput(object):
    """
    Wrapper class for handling multiple formats specifying CCS sequences.
    The old convention was to use .fasta, but we would like to be able to pass
    the classifier a ConsensusReadSet (i.e. .bam files) instead for use within
    pbsmrtpipe.
    """
    def __init__(self, file_name):
        self.file_name = file_name
        self._is_fasta = False
        self.ext = op.splitext(file_name)[1].upper()
        if self.ext in [".FA", ".FASTA"]:
            self._dataset = FastaReader(file_name)
            self._is_fasta = True
        elif self.ext == ".BAM":
            self._dataset = openDataFile(file_name)
        else: # either contigset.xml or consensusreadset.xml
            assert self.ext == ".XML"
            self._dataset = openDataSet(file_name)
            if isinstance(self._dataset, ContigSet):
                self._is_fasta = True

    def __iter__(self):
        for rec in self._dataset:
            if not self._is_fasta:
                rec = CCSBamSequence(rec.peer)
            yield rec

    def close(self):
        """Close all datasets."""
        self._dataset.close()

    def __enter__(self):
        return self

    def __exit__(self, exc_type, exc_value, traceback):
        self.close()

    def __len__(self):
        if not self._is_fasta:
            return len(self._dataset)
        else:
            if self.ext in [".FA", ".FASTA"]:
                return len([r for r in FastaReader(self.file_name)])
            else: # contigset
                n = 0
                for rr in self._dataset.resourceReaders():
                    n += len([r for r in rr])
                return n

    def __delitem__(self, dummy_name):
        raise NotImplementedError("%s.%s" % (self.__class__.__name__,
                                             "__delitem__"))

    def __setitem__(self, dummy_index, dummy_name):
        raise NotImplementedError("%s.%s" % (self.__class__.__name__,
                                             "__setitem__"))

    def __getitem__(self, key):
        raise NotImplementedError("%s.%s" % (self.__class__.__name__,
                                             "__getitem__"))
Пример #3
0
#! /usr/bin/env python
import sys
from pbcore.io import FastaReader

f = FastaReader(sys.argv[1])

for seq in f:
	chr = seq
	
list = chr.sequence.split('N')

max = 0
max_seq = ""
for sec in list:
	if len(sec) > max:
		max = len(sec)
		max_seq = sec

print len(max_seq)

wf = open("human_chr14.fa","w")

wf.write(max_seq)

f.close()
wf.close()

Пример #4
0
#! /usr/bin/env python
import sys
from pbcore.io import FastaReader

f = FastaReader(sys.argv[1])

for seq in f:
    chr = seq

list = chr.sequence.split('N')

max = 0
max_seq = ""
for sec in list:
    if len(sec) > max:
        max = len(sec)
        max_seq = sec

print len(max_seq)

wf = open("human_chr14.fa", "w")

wf.write(max_seq)

f.close()
wf.close()