示例#1
0
def get_snps_from_vcf_dict(vcf_dict, ref_seq=rsrs_seq):
    # print ref_seq[:20]
    mutations = []
    for snp in vcf_dict:
        if snp[-1] == 'del':
            # [8270, [8271, 8272, 8273, 8274, 8275, 8276, 8277, 8278, 8279], 'del']
            mut = datatypes.Deletion("%d-%dd" % (snp[1][0], snp[1][-1]))
        elif snp[-1] == 'mism':
            # [73, ['G'], 'mism'] ma in realtà vorrei
            # [73, ['A', 'G'], 'mism'] dove 'A' sarebbe il nt in RSRS, 'G' la mutazione
            ref = ref_seq[snp[0]-1]
            var = snp[1][0]
            # print snp[0]-1, ref, var
            # print (ref in consts.PUR and var in consts.PUR) or (ref in consts.PYR and var in consts.PYR)
            if (ref in consts.PUR and var in consts.PUR) or (ref in consts.PYR and var in consts.PYR):
                mut = datatypes.Transition(snp[0])
            elif (ref in consts.PUR and var in consts.PYR) or (ref in consts.PYR and var in consts.PUR):
                mut = datatypes.Transversion("%d%c" % (snp[0], var))
            else:
                mut = datatypes.Unknown(snp[0])
                mut.change = var
        elif snp[-1] == 'ins':
            # [309, ['CCT'], 'ins']
            mut = datatypes.Insertion("%d.%s" % (snp[0], snp[1]))
            pass
        mutations.append(mut)
    return mutations
示例#2
0
def get_snps(rif, inc, start_pos=0, gap = '-'):
    #pos_a è quella assoluta, relativa cioè alle 2 sequenze allineate, n_gaps si
    #riferisce al numero di gap presenti fino a quel punto in anderson
    pos_a = n_gaps = start_pos
    alg_len = len(rif)
    mutations = []
    while pos_a < (alg_len + start_pos):
        x = rif[pos_a]
        y = inc[pos_a]
        if x != y:
            if x != gap and y != gap:
                #SNP
                #Transizione
                if (x in consts.PUR and y in consts.PUR) or (x in consts.PYR and y in consts.PYR):
                    mut = datatypes.Transition(pos_a-n_gaps+1)
                    #Nel caso il genoma di riferimento non sia Anderson
                    mut.change = y
                    #mut.refsequence = rif
                    mutations.append(mut)
                #Trasversione
                elif (x in consts.PUR and y in consts.PYR) or (x in consts.PYR and y in consts.PUR):
                    mut = datatypes.Transversion("%d%c" % (pos_a-n_gaps+1, y))
                    #mut.refsequence = rif
                    mutations.append(mut)
                #Ambiguity
                elif y in consts.ambiguity.keys():
					if y != 'N':
						for i in consts.ambiguity[y]:
							if i != x: # retain mutation defined by ambiguity only if it's not equal to ref sequence
								if (x in consts.PUR and i in consts.PUR) or (x in consts.PYR and i in consts.PYR):
									mut = datatypes.Transition(pos_a-n_gaps+1)
									#Nel caso il genoma di riferimento non sia Anderson
									mut.change = i
									mut.ambiguity = y
									#mut.refsequence = rif
									mutations.append(mut)
								elif (x in consts.PUR and i in consts.PYR) or (x in consts.PYR and i in consts.PUR):
									mut = datatypes.Transversion("%d%c" % (pos_a-n_gaps+1, i))
									mut.ambiguity = y
									#mut.refsequence = rif
									mutations.append(mut)
                #Non identificabile: N o altre ambiguità

                #Non identificabile: N o altre ambiguità
                else:
					pass
					"""
                    mut = datatypes.Unknown(pos_a-n_gaps+1)
                    #mut.refsequence = rif
                    mut.change = y
                    mutations.append(mut)
					"""
                pos_a += 1
            elif y != gap and x == gap:
                #Inserzione
                pos_i = pos_a - n_gaps
                ins_seq = [y]
                pos_a += 1
                n_gaps += 1
                try:
                    x = rif[pos_a]
                    y = inc[pos_a]
                except IndexError:
                    #caso limite: l'inserzione e' di lunghezza 1 alla fine dell'allineamento
                    #print "pos_a:", pos_a, "n_gaps:", n_gaps, "len(rif)", len(rif), "x:", x, "y:", y, "len(inc)", len(inc)
                    x = rif[pos_a-1]
                    y = inc[pos_a-1]
                while pos_a < alg_len-1 and ( (x == gap and y != gap) or (x == y == gap) ):
                    if y != gap:
                        ins_seq.append(y)
                    pos_a += 1
                    n_gaps += 1
                    x = rif[pos_a]
                    y = inc[pos_a]
                if pos_a == alg_len - 1: pos_a += 1
                mut = datatypes.Insertion("%d.%s" % (pos_i, ''.join(ins_seq)))
                #mut.refsequence = rif
                mutations.append(mut)
            elif y == gap and x != gap:
                #Delezione
                pos_d = pos_a-n_gaps+1
                pos_a += 1
                if pos_a < alg_len:
                    x = rif[pos_a]
                    y = inc[pos_a]
                    while pos_a < alg_len-1 and ( (y == gap and x != gap) or (x == y == gap) ):
                        pos_a += 1
                        if x == y == gap: n_gaps += 1
                        x = rif[pos_a]
                        y = inc[pos_a]
                    if pos_a == alg_len - 1: pos_a += 1
                mut = datatypes.Deletion("%d-%dd" % (pos_d, pos_a-n_gaps))
                mutations.append(mut)
        else:
            #accrocchio per permettere di associare quelle riconosciute come retromutazioni nel'albero con quelle che poi in rCRS sono presenti
            #mutations.append(datatypes.Retromutation("%d!" % (pos_a-n_gaps+1,)))
            pos_a += 1
            #basta controllarne uno, si sa che sono uguali
            if x == gap: n_gaps += 1
    return mutations
示例#3
0
def choose_terminal_mutation(dict_set_start_list):
    new_list = []
    weird_guys = {}
    for pos_num in dict_set_start_list.keys():
        pos_event_dict = dict_set_start_list[pos_num]
        # se l'ultimo evento (che è il primo, nella lista revertita) è una
        # retromutazione, viene scartato
        if [event.mutation_type()
                for event in pos_event_dict][0] == 'Retromutation':
            pass
        # se non c'è retromutazione ma comunque più di due eventi,
        # escludendo delezioni, possono essere Transversion e Transition
        elif len(pos_event_dict) > 1:
            # stampa la voce del dizionario
            weird_guys[pos_num] = pos_event_dict
            # pick up the last mutation only
            # compare it to RSRS ref allele to state if it's Transition or Transversion
            try:
                # try to account for all special cases found in phylotree
                if dict_set_start_list[pos_num][0].mutation_type(
                ) == 'Deletion' and dict_set_start_list[pos_num][
                        1].mutation_type() in ['Transition', 'Transversion']:
                    pass
                elif set([event.mutation_type()
                          for event in pos_event_dict]) == set(['Insertion']):
                    event_seq = ''.join([
                        event.seq
                        for event in pos_event_dict[len(pos_event_dict)::-1]
                    ])
                    new_list.append(
                        datatypes.Insertion("{}.{}".format(pos_num,
                                                           event_seq)))
                elif len(pos_event_dict) == 2 and dict_set_start_list[pos_num][
                        0].mutation_type(
                        ) == 'Deletion' and dict_set_start_list[pos_num][
                            1].mutation_type() == 'Insertion':
                    pass
                elif isTransition(dict_set_start_list[pos_num][0].change,
                                  RCRS[pos_num - 1]):
                    new_list.append(datatypes.Transition("{}".format(pos_num)))
                elif isTransversion(dict_set_start_list[pos_num][0].change,
                                    RCRS[pos_num - 1]):
                    new_list.append(
                        datatypes.Transversion("{}{}".format(
                            pos_num, dict_set_start_list[pos_num][0].change)))
            except Exception as e:
                print str(e)
                print dict_set_start_list
                print set([event.mutation_type() for event in pos_event_dict])
                print "ERROR", pos_event_dict
            #
            # # il prodotto di una transizione e di una trasversione è una trasversione
            # # il cui change è quello dell'ultima trovata lungo l'albero
            # if sorted([event.mutation_type() for event in pos_event_dict]) == ['Transition', 'Transversion']:
            #     new_list.append(datatypes.Transversion("%s" % str(pos_num)+pos_event_dict[0].change))
            # # il prodotto di due trasversioni è una transizione
            # # il cui change è quello dell'ultima trovata lungo l'albero
            # elif sorted([event.mutation_type() for event in pos_event_dict]) == ['Transversion', 'Transversion']:
            #     new_list.append(datatypes.Transition("%d" % pos_num))
        else:
            new_list.append(dict_set_start_list[pos_num][0])
        """
		A -> G -> T
        Transition -> Transversion = Transversion

        A -> T -> C
        Transversion -> Transition = Transversion

        A -> T -> G
        Transversion -> Transversion = Transition

        A -> T -> A
        Transition -> Transition = Retromutation
		"""
    return new_list, weird_guys