Python PCRSequence.snps примеры использования

Язык программирования: Python

Пространство имен/Пакет: qtools.lib.bio

Класс/Тип: PCRSequence

Метод/Функция: snps

Примеров на hotexamples.com: 3

Python PCRSequence.snps - 3 примера найдено. Это лучшие примеры Python кода для qtools.lib.bio.PCRSequence.snps, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

snps(3)

Основные методы

snps (3)

Пример #1

Показать файл

Файл: pcr.py Проект: v-makarenko/vtoolsmq

def pcr_sequences_for_amplicon(amplicon, padding_pos5=0, padding_pos3=0, include_snps=False):
    """
    Returns the PCRSequence objects representing the specified amplicon.
    If include_snps=True, each object will include an attribute 'snps', which will
    include a list of SNP dictionaries (like the SNPDBCache object, only
    SQLAlchemy-agnostic, and 'class' in place of 'class_')

    :param padding_pos5: The amount of padding to prefix the amplicon.  Max MAX_CACHE_PADDING.
    :param padding_pos3: The amount of padding to suffix the amplicon.  Max MAX_CACHE_PADDING.
    :param include_snps: Whether to include the snps attribute on each amplicon.
    """
    if padding_pos5 < 0 or padding_pos5 > MAX_CACHE_PADDING:
        raise ValueError, "Illegal padding value: %s" % padding_pos5
    
    if padding_pos3 < 0 or padding_pos3 > MAX_CACHE_PADDING:
        raise ValueError, "Illegal padding value: %s" % padding_pos3
    
    pseqs = []
    for seq in amplicon.cached_sequences:
        main     = SimpleGenomeSequence(seq.chromosome, seq.start_pos, seq.end_pos, '+', seq.positive_amplicon)

        # this could be of a different length than requested
        padding_pos5_seq = seq.padding_pos5(padding_pos5, '+')
        padding_pos3_seq = seq.padding_pos3(padding_pos3, '+')

        prefix   = SimpleGenomeSequence(seq.chromosome,
                                        seq.start_pos-len(padding_pos5_seq),
                                        seq.start_pos-1,
                                        '+', padding_pos5_seq)
        suffix   = SimpleGenomeSequence(seq.chromosome,
                                        seq.end_pos+1,
                                        seq.end_pos+len(padding_pos3_seq),
                                        '+', padding_pos3_seq)
        pseq     = PCRSequence(main, prefix, suffix)

        if include_snps:
            snps = seq.snps_in_range(padding_pos5=len(padding_pos5_seq), padding_pos3=len(padding_pos3_seq))
            pseq.snps = [dict([(k, v) for k, v in snp.__dict__.items() if not k.startswith('_')]) for snp in snps]
            for snp in pseq.snps:
                snp['class'] = snp['class_']
        
        pseqs.append(pseq)
    return pseqs

Пример #2

Показать файл

Файл: assay.py Проект: v-makarenko/vtoolsmq

def sequences_snps_for_assay(config, assay, seq_source, snp_source, left_padding=0, right_padding=0, cache=True):
    sequences = []
    if not assay:
        return sequences

    if assay.cached_sequences:
        all_cached = True
        for seq in assay.cached_sequences:
            if not seq.cached(left_padding, right_padding):
                all_cached = False
                break
        if all_cached:
            for seq in assay.cached_sequences:
                amplicon = SimpleGenomeSequence(seq.chromosome, seq.start_pos, seq.end_pos, "+", seq.positive_amplicon)
                prefix = SimpleGenomeSequence(
                    seq.chromosome,
                    seq.start_pos - left_padding,
                    seq.start_pos - 1,
                    "+",
                    seq.padding_pos5(left_padding, "+"),
                )
                suffix = SimpleGenomeSequence(
                    seq.chromosome,
                    seq.end_pos + 1,
                    seq.end_pos + right_padding,
                    "+",
                    seq.padding_pos3(right_padding, "+"),
                )
                pseq = PCRSequence(amplicon, prefix, suffix)
                # TODO unify SNP object

                pseq.snps = [
                    dict([(k, v) for k, v in snp.__dict__.items() if not k.startswith("_")]) for snp in seq.snps
                ]
                for snp in pseq.snps:
                    snp["class"] = snp["class_"]

                sequences.append(pseq)

            return sequences

    if assay.assay_type == Assay.TYPE_PRIMER:
        sequences = seq_source.sequences_for_primers(assay.primer_fwd, assay.primer_rev, left_padding, right_padding)
    elif assay.assay_type == Assay.TYPE_LOCATION:
        sequence = seq_source.sequence_around_loc(
            assay.chromosome, assay.probe_pos, assay.amplicon_width, left_padding, right_padding
        )
        sequences.append(sequence)
    elif assay.assay_type == Assay.TYPE_SNP:
        snps = snp_source.snps_by_rsid(assay.snp_rsid)
        # TODO: make SNP object so that access style is same as assay
        for snp in snps:
            if snp["refUCSC"] == "-":  # deletion:
                sequences.append(
                    seq_source.sequence_around_region(
                        snp["chrom"][3:],
                        snp["chromEnd"],
                        snp["chromEnd"],
                        assay.amplicon_width,
                        left_padding,
                        right_padding,
                    )
                )
            else:
                sequences.append(
                    seq_source.sequence_around_region(
                        snp["chrom"][3:],
                        snp["chromStart"] + 1,
                        snp["chromEnd"],
                        assay.amplicon_width,
                        left_padding,
                        right_padding,
                    )
                )

    for seq in sequences:
        seq.snps = snp_source.snps_in_range(seq.chromosome, seq.start, seq.end)

    if cache:
        # TODO: library method? -- given PCR object, SNP dict?  or unify display objects
        # to and from their DB representation?
        assay.cached_sequences = []
        for seq in sequences:
            cached_seq = HG19AssayCache(
                chromosome=seq.chromosome,
                start_pos=seq.amplicon.start,
                end_pos=seq.amplicon.end,
                seq_padding_pos5=left_padding,
                seq_padding_pos3=right_padding,
                positive_sequence=seq.merged_positive_sequence.sequence,
            )

            cached_seq.amplicon_dg = dg_seq(config, cached_seq.positive_amplicon)
            cached_seq.amplicon_tm = tm_probe(config, cached_seq.positive_amplicon)
            for snp in seq.snps:
                cached_seq.snps.append(
                    SNP131AssayCache(
                        bin=snp["bin"],
                        chrom=snp["chrom"],
                        chromStart=snp["chromStart"],
                        chromEnd=snp["chromEnd"],
                        name=snp["name"],
                        score=snp["score"],
                        strand=snp["strand"],
                        refNCBI=snp["refNCBI"],
                        refUCSC=snp["refUCSC"],
                        observed=snp["observed"],
                        molType=snp["molType"],
                        class_=snp["class"],
                        valid=snp["valid"],
                        avHet=snp["avHet"],
                        avHetSE=snp["avHetSE"],
                        func=snp["func"],
                        locType=snp["locType"],
                        weight=snp["weight"],
                    )
                )
            assay.cached_sequences.append(cached_seq)

        Session.commit()

    return sequences

Пример #3

Показать файл

Файл: cutter.py Проект: v-makarenko/vtoolsmq

    def cut(self):
        left_padding = self.form_result['left_padding']
        right_padding = self.form_result['right_padding']
        enzyme = Session.query(Enzyme).get(self.form_result['enzyme'])
        cutseq = enzyme.cutseq
        
        # TODO change to single amplicon?
        if self.form_result['assay_id']:
            assay = Session.query(SequenceGroup).get(self.form_result['assay_id'])
            amplicon_tuples = pcr_sequences_snps_for_group(assay, padding_pos5=left_padding, padding_pos3=right_padding)
            sequences = []
            for amp, pseqs in amplicon_tuples:
                sequences.extend(pseqs)
        else:
            manual_seq = PCRSequence(SimpleGenomeSequence(0, 0, len(self.form_result['positive_sequence'])-1, '+',
                                                          full_sequence=self.form_result['positive_sequence']))
            manual_seq.snps = []
            sequences = [manual_seq]
        
        # TODO: this is arbitrary
        location_cut_data = self.__enzyme_cut_locations(sequences[0], [enzyme])
        
        # TODO support multiple sequences, somehow.
        
        pos_seq = sequences[0].merged_positive_sequence
        total_width = len(pos_seq)
        re_width_pct = 100*float(len(cutseq))/total_width
        
        enzyme_cut_data = location_cut_data[self.form_result['enzyme']]
        
        positive_matches = []
        negative_matches = []
        return_dict = {}
        snp_dict = dict([(s['name'], s) for s in sequences[0].snps])
        # keys here are going to be amplicon_cuts, left_cuts and right_cuts, left_cut
        for k, v in sorted(enzyme_cut_data.items()):
            blank, original_positives, original_negatives = v[0]
            snp_positives = []
            snp_negatives = []
            cancel_positives = []
            cancel_negatives = []
            for cuts in v[1:]:
                snp_name, shifted_positives, shifted_negatives = cuts
                snp = snp_dict[snp_name]
                # TODO: this is an oversimplification but will probably only result
                # in a shift in a particular restriction site
                #
                # TODO: I think this is sketchy right at the edges, needs to be tested. (> vs >=, etc)
                # TODO: the code could stand to be more compact as well.
                if len(shifted_positives) > len(original_positives):
                    found = False
                    for start, end, strand in shifted_positives:
                        if (snp['chromEnd'] >= pos_seq.start+start-1 and snp['chromEnd'] <= pos_seq.start+end) or \
                           (snp['chromStart'] >= pos_seq.start+start-1 and snp['chromStart'] <= pos_seq.start+end):
                            snp_positives.append((start, end, strand))
                            found = True
                    if not found:
                        pass
                        #raise Exception, "ERROR: additional positive strand restriction site not found by analyzing SNPs"
                
                elif len(shifted_positives) < len(original_positives):
                    found = False
                    for start, end, strand in original_positives:
                        if (snp['chromEnd'] >= pos_seq.start+start-1 and snp['chromEnd'] <= pos_seq.start+end) or \
                           (snp['chromStart'] >= pos_seq.start+start-1 and snp['chromStart'] <= pos_seq.start+end) or \
                           (pos_seq.start+start-1 >= snp['chromStart'] and pos_seq.start+end <= snp['chromEnd']):
                            if (start, end, strand) not in cancel_positives:
                                cancel_positives.append((start, end, strand))
                            found = True
                    if not found:
                        pass
                        #raise Exception, "ERROR: cancelled positive strand restriction site not found by analyzing SNPs"
                           
                if len(shifted_negatives) > len(original_negatives):
                    # find where the new snp is
                    found = False
                    for start, end, strand in shifted_negatives:
                        if (snp['chromEnd'] >= pos_seq.end-(end+1) and snp['chromEnd'] <= pos_seq.end-start) or \
                           (snp['chromStart'] >= pos_seq.end-(end+1) and snp['chromStart'] <= pos_seq.end-start):
                            snp_negatives.append((start, end, strand))
                            found = True
                    if not found:
                        pass
                        # insertion screws you here.
                        #raise Exception, (snp['chromEnd'], snp['chromStart'], pos_seq.end, pos_seq.end-(shifted_negatives[0][1]+1), pos_seq.end-(shifted_negatives[0][0]))
                        #raise Exception, "ERROR: additional negative strand restriction site not found by analyzing SNPs"
                    
                elif len(shifted_negatives) < len(original_negatives):
                    found = False
                    for start, end, strand in original_negatives:
                        if (snp['chromEnd'] >= pos_seq.end-(end+1) and snp['chromEnd'] <= pos_seq.end-start) or \
                           (snp['chromStart'] >= pos_seq.end-(end+1) and snp['chromStart'] <= pos_seq.end-start) or \
                           (pos_seq.end-(end+1) >= snp['chromStart'] and pos_seq.end-start <= snp['chromEnd']):
                            if (start, end, strand) not in cancel_negatives:
                                cancel_negatives.append((start, end, strand))
                            found = True
                    
                    if not found:
                        pass
                        #raise Exception, "ERROR: cancelling negative strand restriction site not found by analyzing SNPs"
            
            for tup in cancel_positives:
                original_positives.remove(tup)
            for tup in cancel_negatives:
                original_negatives.remove(tup)
            
            return_dict[k] = len(original_positives) + len(cancel_positives) \
                             + len(original_negatives) + len(cancel_negatives) \
                             + len(snp_positives) + len(snp_negatives)
            
            for start, end, strand in original_positives:
                positive_matches.append({'offset': start, 'pos': '%s%%' % (start*100.0/total_width), 'class': 'stable_re_site'})
            
            for start, end, strand in original_negatives:
                negative_matches.append({'offset': start, 'pos': '%s%%' % (100-(start*100.0/total_width)-re_width_pct), 'class': 'stable_re_site'})
            
            for start, end, strand in snp_positives:
                positive_matches.append({'offset': start, 'pos': '%s%%' % (start*100.0/total_width), 'class': 'snp_re_site'})
            
            for start, end, strand in snp_negatives:
                negative_matches.append({'offset': start, 'pos': '%s%%' % (100-(start*100.0/total_width)-re_width_pct), 'class': 'snp_re_site'})
            
            for start, end, strand in cancel_positives:
                positive_matches.append({'offset': start, 'pos': '%s%%' % (start*100.0/total_width), 'class': 'snp_cancel_re_site'})
            
            for start, end, strand in cancel_negatives:
                negative_matches.append({'offset': start, 'pos': '%s%%' % (100-(start*100.0/total_width)-re_width_pct), 'class': 'snp_cancel_re_site'})
                
        return_dict['positive_cuts'] = positive_matches
        return_dict['negative_cuts'] = negative_matches
        return_dict['re_width_pct'] = "%s%%" % re_width_pct

        # future out amplicon position
        amplicon_start = left_padding
        amplicon_end = len(pos_seq) - (right_padding+1)
        
        left_offsets = [match['offset'] for match in positive_matches if match['offset'] < (amplicon_start - len(cutseq))]
        if left_offsets:
            rightmost_left = max(left_offsets)+len(cutseq)
        else:
            rightmost_left = None
        
        right_offsets = [match['offset'] for match in positive_matches if match['offset'] > amplicon_end]
        if right_offsets:
            leftmost_right = min(right_offsets)
        else:
            leftmost_right = None
        
        amplicon_cuts = [match for match in positive_matches if match['offset'] >= amplicon_start and match['offset'] <= amplicon_end]
        
        # todo: bug if the cutters are asymmetric and the negative cutsite is shorter (redmine 669)
        if rightmost_left is not None and leftmost_right is not None and len(amplicon_cuts) == 0:
            inner_len = leftmost_right - rightmost_left
            inner_seq = pos_seq.sequence[rightmost_left:leftmost_right]
            inner_gc = gc_content(inner_seq)
            left_offset = amplicon_start - rightmost_left
            right_offset = leftmost_right - amplicon_end

            return_dict['fragment'] = {'len': inner_len,
                                       'loff': left_offset,
                                       'roff': right_offset,
                                       'gc': "%.2f%%" % (inner_gc*100)}
            
        return return_dict