def get_snps_from_vcf_dict(vcf_dict, ref_seq=rsrs_seq): # print ref_seq[:20] mutations = [] for snp in vcf_dict: if snp[-1] == 'del': # [8270, [8271, 8272, 8273, 8274, 8275, 8276, 8277, 8278, 8279], 'del'] mut = datatypes.Deletion("%d-%dd" % (snp[1][0], snp[1][-1])) elif snp[-1] == 'mism': # [73, ['G'], 'mism'] ma in realtà vorrei # [73, ['A', 'G'], 'mism'] dove 'A' sarebbe il nt in RSRS, 'G' la mutazione ref = ref_seq[snp[0]-1] var = snp[1][0] # print snp[0]-1, ref, var # print (ref in consts.PUR and var in consts.PUR) or (ref in consts.PYR and var in consts.PYR) if (ref in consts.PUR and var in consts.PUR) or (ref in consts.PYR and var in consts.PYR): mut = datatypes.Transition(snp[0]) elif (ref in consts.PUR and var in consts.PYR) or (ref in consts.PYR and var in consts.PUR): mut = datatypes.Transversion("%d%c" % (snp[0], var)) else: mut = datatypes.Unknown(snp[0]) mut.change = var elif snp[-1] == 'ins': # [309, ['CCT'], 'ins'] mut = datatypes.Insertion("%d.%s" % (snp[0], snp[1])) pass mutations.append(mut) return mutations
def get_snps(rif, inc, start_pos=0, gap = '-'): #pos_a è quella assoluta, relativa cioè alle 2 sequenze allineate, n_gaps si #riferisce al numero di gap presenti fino a quel punto in anderson pos_a = n_gaps = start_pos alg_len = len(rif) mutations = [] while pos_a < (alg_len + start_pos): x = rif[pos_a] y = inc[pos_a] if x != y: if x != gap and y != gap: #SNP #Transizione if (x in consts.PUR and y in consts.PUR) or (x in consts.PYR and y in consts.PYR): mut = datatypes.Transition(pos_a-n_gaps+1) #Nel caso il genoma di riferimento non sia Anderson mut.change = y #mut.refsequence = rif mutations.append(mut) #Trasversione elif (x in consts.PUR and y in consts.PYR) or (x in consts.PYR and y in consts.PUR): mut = datatypes.Transversion("%d%c" % (pos_a-n_gaps+1, y)) #mut.refsequence = rif mutations.append(mut) #Ambiguity elif y in consts.ambiguity.keys(): if y != 'N': for i in consts.ambiguity[y]: if i != x: # retain mutation defined by ambiguity only if it's not equal to ref sequence if (x in consts.PUR and i in consts.PUR) or (x in consts.PYR and i in consts.PYR): mut = datatypes.Transition(pos_a-n_gaps+1) #Nel caso il genoma di riferimento non sia Anderson mut.change = i mut.ambiguity = y #mut.refsequence = rif mutations.append(mut) elif (x in consts.PUR and i in consts.PYR) or (x in consts.PYR and i in consts.PUR): mut = datatypes.Transversion("%d%c" % (pos_a-n_gaps+1, i)) mut.ambiguity = y #mut.refsequence = rif mutations.append(mut) #Non identificabile: N o altre ambiguità #Non identificabile: N o altre ambiguità else: pass """ mut = datatypes.Unknown(pos_a-n_gaps+1) #mut.refsequence = rif mut.change = y mutations.append(mut) """ pos_a += 1 elif y != gap and x == gap: #Inserzione pos_i = pos_a - n_gaps ins_seq = [y] pos_a += 1 n_gaps += 1 try: x = rif[pos_a] y = inc[pos_a] except IndexError: #caso limite: l'inserzione e' di lunghezza 1 alla fine dell'allineamento #print "pos_a:", pos_a, "n_gaps:", n_gaps, "len(rif)", len(rif), "x:", x, "y:", y, "len(inc)", len(inc) x = rif[pos_a-1] y = inc[pos_a-1] while pos_a < alg_len-1 and ( (x == gap and y != gap) or (x == y == gap) ): if y != gap: ins_seq.append(y) pos_a += 1 n_gaps += 1 x = rif[pos_a] y = inc[pos_a] if pos_a == alg_len - 1: pos_a += 1 mut = datatypes.Insertion("%d.%s" % (pos_i, ''.join(ins_seq))) #mut.refsequence = rif mutations.append(mut) elif y == gap and x != gap: #Delezione pos_d = pos_a-n_gaps+1 pos_a += 1 if pos_a < alg_len: x = rif[pos_a] y = inc[pos_a] while pos_a < alg_len-1 and ( (y == gap and x != gap) or (x == y == gap) ): pos_a += 1 if x == y == gap: n_gaps += 1 x = rif[pos_a] y = inc[pos_a] if pos_a == alg_len - 1: pos_a += 1 mut = datatypes.Deletion("%d-%dd" % (pos_d, pos_a-n_gaps)) mutations.append(mut) else: #accrocchio per permettere di associare quelle riconosciute come retromutazioni nel'albero con quelle che poi in rCRS sono presenti #mutations.append(datatypes.Retromutation("%d!" % (pos_a-n_gaps+1,))) pos_a += 1 #basta controllarne uno, si sa che sono uguali if x == gap: n_gaps += 1 return mutations
def choose_terminal_mutation(dict_set_start_list): new_list = [] weird_guys = {} for pos_num in dict_set_start_list.keys(): pos_event_dict = dict_set_start_list[pos_num] # se l'ultimo evento (che è il primo, nella lista revertita) è una # retromutazione, viene scartato if [event.mutation_type() for event in pos_event_dict][0] == 'Retromutation': pass # se non c'è retromutazione ma comunque più di due eventi, # escludendo delezioni, possono essere Transversion e Transition elif len(pos_event_dict) > 1: # stampa la voce del dizionario weird_guys[pos_num] = pos_event_dict # pick up the last mutation only # compare it to RSRS ref allele to state if it's Transition or Transversion try: # try to account for all special cases found in phylotree if dict_set_start_list[pos_num][0].mutation_type( ) == 'Deletion' and dict_set_start_list[pos_num][ 1].mutation_type() in ['Transition', 'Transversion']: pass elif set([event.mutation_type() for event in pos_event_dict]) == set(['Insertion']): event_seq = ''.join([ event.seq for event in pos_event_dict[len(pos_event_dict)::-1] ]) new_list.append( datatypes.Insertion("{}.{}".format(pos_num, event_seq))) elif len(pos_event_dict) == 2 and dict_set_start_list[pos_num][ 0].mutation_type( ) == 'Deletion' and dict_set_start_list[pos_num][ 1].mutation_type() == 'Insertion': pass elif isTransition(dict_set_start_list[pos_num][0].change, RCRS[pos_num - 1]): new_list.append(datatypes.Transition("{}".format(pos_num))) elif isTransversion(dict_set_start_list[pos_num][0].change, RCRS[pos_num - 1]): new_list.append( datatypes.Transversion("{}{}".format( pos_num, dict_set_start_list[pos_num][0].change))) except Exception as e: print str(e) print dict_set_start_list print set([event.mutation_type() for event in pos_event_dict]) print "ERROR", pos_event_dict # # # il prodotto di una transizione e di una trasversione è una trasversione # # il cui change è quello dell'ultima trovata lungo l'albero # if sorted([event.mutation_type() for event in pos_event_dict]) == ['Transition', 'Transversion']: # new_list.append(datatypes.Transversion("%s" % str(pos_num)+pos_event_dict[0].change)) # # il prodotto di due trasversioni è una transizione # # il cui change è quello dell'ultima trovata lungo l'albero # elif sorted([event.mutation_type() for event in pos_event_dict]) == ['Transversion', 'Transversion']: # new_list.append(datatypes.Transition("%d" % pos_num)) else: new_list.append(dict_set_start_list[pos_num][0]) """ A -> G -> T Transition -> Transversion = Transversion A -> T -> C Transversion -> Transition = Transversion A -> T -> G Transversion -> Transversion = Transition A -> T -> A Transition -> Transition = Retromutation """ return new_list, weird_guys