示例#1
0
文件: models.py 项目: kablag/FastRFLP
 def sites_outside_snp(target_seq, snp_pos, allele_pos_end):
     sites_outside = []
     pe_len = len(pe.clean_recognition_sequence)
     pe_seq = expand_sequence(pe.clean_recognition_sequence.lower())
     # seq_before_snp = snp.ex_wt_sequence[:snp.snp_pos - pe_len]
     # seq_after_snp = snp.ex_wt_sequence[snp.snp_pos + snp.wt_allele_len:]
     for i in range(0, len(target_seq) - pe_len):
         try:
             if i in range(snp_pos - pe_len, allele_pos_end):
                 raise fexceps.DigestError
             for ii in range(0, pe_len):
                 if not can_recognize(target_seq[i+ii], pe_seq[ii]):
                     raise fexceps.DigestError
             sites_outside.append(i)
         except fexceps.DigestError:
             pass
     if not pe.clean_recognition_sequence == \
         reverse_complement(pe.clean_recognition_sequence):
         pe_seq_rc = expand_sequence(reverse_complement(pe.clean_recognition_sequence.lower()))
         for i in range(0, len(target_seq) - pe_len):
             try:
                 if i in range(snp_pos - pe_len, allele_pos_end):
                     raise fexceps.DigestError
                 for ii in range(0, pe_len):
                     if not can_recognize(target_seq[i+ii], pe_seq_rc[ii]):
                         raise fexceps.DigestError
                 sites_outside.append(i)
             except fexceps.DigestError:
                 pass
     return sites_outside
示例#2
0
文件: models.py 项目: kablag/FastRFLP
        def pe_can_determine_snp(pe:PrototypeEnzyme,
                                 snp:Snp,
                                 max_num_of_mismatches):
            pe_len = len(pe.clean_recognition_sequence)
            pe_seq = expand_sequence(pe.clean_recognition_sequence.lower())
            wt_sites = gen_mask(pe_len,
                                pe_seq,
                                snp.ex_wt_sequence,
                                snp.snp_pos,
                                snp.wt_pos_end,
                                snp.wt_allele_len,
                                )
            mut_sites = gen_mask(pe_len,
                                 pe_seq,
                                 snp.ex_mut_sequence,
                                 snp.snp_pos,
                                 snp.mut_pos_end,
                                 snp.mut_allele_len,
                                 )
            if not pe.clean_recognition_sequence == \
                reverse_complement(pe.clean_recognition_sequence):
                pe_seq_rc = expand_sequence(reverse_complement(pe.clean_recognition_sequence.lower()))
                wt_sites = wt_sites + gen_mask(pe_len,
                                pe_seq_rc,
                                snp.ex_wt_sequence,
                                snp.snp_pos,
                                snp.wt_pos_end,
                                snp.wt_allele_len,
                                )
                mut_sites = mut_sites + gen_mask(pe_len,
                                 pe_seq_rc,
                                 snp.ex_mut_sequence,
                                 snp.snp_pos,
                                 snp.mut_pos_end,
                                 snp.mut_allele_len,
                                 )
            wt_filtered = []
            for wt_site in wt_sites:
                try:
                    for mut_site in mut_sites:
                        wt_site = test_intercept(wt_site, mut_site)
                    wt_filtered.append(wt_site)
                except fexceps.SitesCollisionError:
                    pass

            mut_filtered = []
            for mut_site in mut_sites:
                try:
                    for wt_site in wt_sites:
                        mut_site = test_intercept(mut_site, wt_site)
                    mut_filtered.append(mut_site)
                except fexceps.SitesCollisionError:
                    pass

            return (wt_filtered, mut_filtered)
示例#3
0
文件: tests.py 项目: kablag/FastRFLP
 def test_init_from_IUPAC(self):
     """
     Snp("aaaaaaaaaaaaaaaaaaaaaaYaaaaaaaaaaaaaaaaaaaaaaa" ok
     """
     snp = Snp("aaaaaaaaaaaaaaaaaaaaaaYaaaaaaaaaaaaaaaaaaaaaaa")
     self.assertEqual(snp.snp_pos, 23)
     self.assertEqual(snp.wt_allele, 'c')
     self.assertEqual(snp.ex_wt_sequence,
                      expand_sequence('aaaaaaaaaaaaaaaaaaaaaacaaaaaaaaaaaaaaaaaaaaaaa'))
     self.assertEqual(snp.mut_allele, 't')
     self.assertEqual(snp.ex_mut_sequence,
                      expand_sequence('aaaaaaaaaaaaaaaaaaaaaataaaaaaaaaaaaaaaaaaaaaaa'))
示例#4
0
文件: snp.py 项目: kablag/FastRFLP
    def __init__(self, sequence: str, name=None):
        def from_IUPAC(sequence: str):
            snp_positions = []
            for char in "rymksw":
                if sequence.count(char) == 1:
                    position = sequence.find(char)
                    snp_positions.append((char, position, tuple(ambiguity_dict[char])))#  IUPACdict[char]))
            if len(snp_positions) == 1:
                return snp_positions[0]
            return None

        def from_wt_slash_mut(sequence: str):
            snps = re.findall('\[[atgc]*/[atgc]*]', sequence)
            if len(snps) == 1:
                return snps[0], sequence.find(snps[0]), \
                       tuple(snps[0]
                             .replace('[','')
                             .replace(']','')
                             .split('/'))
            return None

        self.name = name if name is not None else str(uuid.uuid1())
        sequence = re.sub('[^{}]'.format('atgcrymkswbdhvnATGCRYMKSWBDHVN\[\]\/'),
                          '',
                          sequence.lower())
        info_from_seq = from_wt_slash_mut(sequence)
        if info_from_seq is None:
            info_from_seq = from_IUPAC(sequence)
        if info_from_seq is not None:
            self.original_snp_sign, self.snp_pos, \
            (self.wt_allele, self.mut_allele) = info_from_seq
            self.snp_pos += 1  # from 0 based to real positions
            self.wt_allele_len = len(self.wt_allele)
            self.wt_pos_end = self.snp_pos + self.wt_allele_len  - 1
            self.mut_allele_len = len(self.mut_allele)
            self.mut_pos_end = self.snp_pos + self.mut_allele_len - 1
            self.wt_sequence = sequence.replace(self.original_snp_sign,
                                                                self.wt_allele, 1)
            self.mut_sequence = sequence.replace(self.original_snp_sign,
                                                                self.mut_allele, 1)
            self.ex_wt_sequence = expand_sequence(self.wt_sequence)
            self.ex_mut_sequence = expand_sequence(sequence.replace(self.original_snp_sign,
                                                                 self.mut_allele, 1))
            self.digest_penzymes = []
            if self.snp_pos < 50 or (len(self.wt_sequence) - self.wt_pos_end < 50 \
                or len(self.mut_sequence) - self.mut_pos_end < 50):
                raise fexceps.GetSNPFromSequenceError('Flanking sequence is to short')
        else:
            raise fexceps.GetSNPFromSequenceError('No valid SNP info in seq: %r' % sequence)