Python SeedIndex примеры использования

Язык программирования: Python

Пространство имен/Пакет: biseqt.seeds

Класс/Тип: SeedIndex

Примеров на hotexamples.com: 3

Python SeedIndex - 3 примера найдено. Это лучшие примеры Python кода для biseqt.seeds.SeedIndex, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

index_seeds(1)

score_diagonals(1)

Пример #1

Показать файл

Файл: test_seeds.py Проект: amirkdv/biseqt

def seed_index():
    """Creates a database, a kmer index, and a seed index with word length 5
    stored in memory and returns the seed index. The database is populated with
    3 random sequences of length 100 and all kmers and seeds are indexed."""
    A = Alphabet('ACGT')
    num_seqs = 3
    seq_len = 100
    wordlen = 5

    db = DB(':memory:', A)
    seed_index = SeedIndex(KmerIndex(db, wordlen))
    seed_index.db.initialize()

    fasta = StringIO()
    seqs = (rand_seq(A, seq_len).to_named('#%d' % i) for i in range(num_seqs))
    write_fasta(fasta, seqs)
    fasta.seek(0)

    db.load_fasta(fasta)
    seed_index.index_seeds()
    return seed_index

Пример #2

Показать файл

Файл: mapping.py Проект: amirkdv/biseqt

 def __init__(self, alphabet, wordlen, db_path):
     self.db = DB(db_path, alphabet)
     self.kmer_index = KmerIndex(self.db, wordlen)
     self.seed_index = SeedIndex(self.kmer_index)
     self.bands_indexed = False

Пример #3

Показать файл

Файл: mapping.py Проект: amirkdv/biseqt

class ReadMapper(object):
    def __init__(self, alphabet, wordlen, db_path):
        self.db = DB(db_path, alphabet)
        self.kmer_index = KmerIndex(self.db, wordlen)
        self.seed_index = SeedIndex(self.kmer_index)
        self.bands_indexed = False

    def log(self, *args, **kwargs):
        self.db.log(*args, **kwargs)

    def initialize(self, reads_fa, refs_fa=None, num_reads=-1):
        self.db.initialize()
        with open(reads_fa) as f:
            self.db.load_fasta(f, num=num_reads, rc=True)
        if refs_fa is not None:
            with open(refs_fa) as f:
                self.db.load_fasta(f, rc=False)

    def index_bands(self, **kw):
        self.kmer_index.score_kmers()
        self.seed_index.score_diagonals(**kw)
        self.bands_indexed = True

    def load_reads(self):
        recs_by_content_id = {r.content_id: r for r in list(self.db.find())}
        reads = []
        for record in recs_by_content_id.values():
            if 'rc_of' in record.attrs:
                pair = (recs_by_content_id[record.attrs['rc_of']], record)
                reads.append(Read(self.seed_index, *pair))
        return sorted(reads, key=lambda read: read.record.id)

    def load_refs(self):
        recs_by_content_id = {r.content_id: r for r in list(self.db.find())}
        for record in recs_by_content_id.values():
            if 'rc_of' in record.attrs:
                recs_by_content_id.pop(record.attrs['rc_of'])
                recs_by_content_id.pop(record.content_id)
        return recs_by_content_id.values()

    def map_all_to_all(self, min_band_score, **aligner_kw):
        assert self.bands_indexed, 'Bands must be indexed first'
        self.log('Mapping all reads against each other')
        reads = self.load_reads()  # NOTE comes in sorted order of id
        indic = ProgressIndicator(num_total=len(reads))
        indic.start()
        for read in reads:
            indic.progress()
            # NOTE only compare to reads after us
            others = (r.record for r in reads if r.record.id > read.record.id)
            for other in others:
                rec, target_rec, aln = read.map(other,
                                                min_band_score=min_band_score,
                                                **aligner_kw)
                if rec is None:
                    continue
                yield rec, target_rec, aln
        indic.finish()

    def map_all_to_refs(self, min_band_score, **aligner_kw):
        # FIXME it would be nice to only calculate bands for read v. ref not
        # all pairwise of reads too
        assert self.bands_indexed, 'Bands must be indexed first'
        self.log('Mapping all reads against reference sequences')
        reads, refs = self.load_reads(), self.load_refs()
        indic = ProgressIndicator(num_total=len(reads))
        indic.start()
        for read in reads:
            indic.progress()
            rec, target_rec, aln = read.map(refs,
                                            min_band_score=min_band_score,
                                            **aligner_kw)
            if rec is not None:
                yield rec, target_rec, aln
        indic.finish()

    def mappings_from_sam(self, sampath):
        """Loads mappings from a SAM mapping file and translates sequence
        names to integer identifiers as stored by :class:`biseqt.database.DB`.

        Args:
            db (database.DB): The sequence database where ids are looked up.
            sampath (str): The path to SAM mappings file.

        Yields:
            tuple:
                A 3-tuple containing the read record, the reference name and
                the ``pysam.calignedsegment.AlignedSegment`` mapping it to the
                reference.
        """
        self.log('Loading SAM mappings from %s.' % sampath)
        reads_by_name = {r.record.attrs['name']: r for r in self.load_reads()}
        samfile = pysam.AlignmentFile(sampath)
        for mapping in samfile.fetch():
            qname, rname = mapping.query_name, mapping.reference_name
            # NOTE this is because BLASR does a weird thing with sequence names
            qname = qname.rsplit('/', 1)[0]
            if qname not in reads_by_name:
                continue
            yield reads_by_name[qname], rname, mapping

    def overlaps_from_sam_mappings(self, sampath, min_overlap=-1):
        """Finds all pairs of overlapping sequences based on their mappings to
        a reference.

        Args:
            sampath (str): The path to SAM mappings file.
            min_overlap (int): The minimum required length for overlaps to be
                reported; default is -1 in which case no overlap is excluded.

        Yields:
            tuple:
                A tuple of sequence integer ids (in increasing order) that are
                deemed as overlapping based on SAM mappings.
        """
        self.log('Finding overlaps from SAM mappings.')
        mappings = {read.record.id: (read, ref, mapping)
                    for read, ref, mapping in self.mappings_from_sam(sampath)}
        seqids = sorted(mappings.keys())
        for id0, id1 in combinations(seqids, 2):
            (r0, ref0, map0), (r1, ref1, map1) = mappings[id0], mappings[id1]
            if ref0 != ref1:
                continue
            # TODO ignoring query_alignment_start and query_alignment_end
            overlap_len = min(map0.reference_end, map1.reference_end) - \
                max(map0.reference_start, map1.reference_start)
            if overlap_len <= 0 or overlap_len < min_overlap:
                continue
            # FIXME the second thing we yield is not reported by our own
            # map_all_to_all.
            if map0.is_reverse == map1.is_reverse:
                yield r0.record, r1.record
                yield r0.rc_record, r1.rc_record
            else:
                yield r0.record, r1.rc_record
                yield r0.rc_record, r1.record