def get_ali_entropy_syn(alim, positions=None, alpha=alpha[:5], VERBOSE=0): '''Get entropy of alignment at some positions''' from collections import defaultdict from hivwholeseq.utils.sequence import translate_with_gaps as translate if len(ali[0]) % 3: raise ValueError('The alignment length is not a multiple of 3') if positions is None: positions = np.arange(len(ali[0]) // 3) # The data structure is a nested dict by position and amino acid S = {} # Iterate over codons for pos in positions: if VERBOSE >= 3: print pos asub = alim[:, pos * 3: (pos + 1) * 3] aacount = defaultdict(lambda: defaultdict(int)) for cod in asub: cod = ''.join(cod) aacount[translate(cod)][cod] += 1 Spos = {} for aa, codd in aacount.iteritems(): af = np.array(codd.values(), float) af /= af.sum() Spos[aa] = get_entropy(af) S[pos] = Spos return S
def get_ali_entropy_syn(alim, positions=None, alpha=alpha[:5], VERBOSE=0): '''Get entropy of alignment at some positions''' from collections import defaultdict from hivwholeseq.utils.sequence import translate_with_gaps as translate if len(ali[0]) % 3: raise ValueError('The alignment length is not a multiple of 3') if positions is None: positions = np.arange(len(ali[0]) // 3) # The data structure is a nested dict by position and amino acid S = {} # Iterate over codons for pos in positions: if VERBOSE >= 3: print pos asub = alim[:, pos * 3:(pos + 1) * 3] aacount = defaultdict(lambda: defaultdict(int)) for cod in asub: cod = ''.join(cod) aacount[translate(cod)][cod] += 1 Spos = {} for aa, codd in aacount.iteritems(): af = np.array(codd.values(), float) af /= af.sum() Spos[aa] = get_entropy(af) S[pos] = Spos return S
def get_ali_entropy(ali, positions=None, alpha=alpha[:5], VERBOSE=0): '''Get entropy of alignment at some positions Parameters: - alpha: alphabet for the sequences, defaults to ACGT-. ''' if positions is None: positions = np.arange(len(ali[0])) afs = np.zeros((len(alpha), len(positions))) for i, pos in enumerate(positions): af = np.zeros(len(alpha), float) col = np.fromstring(ali[:, pos], 'S1') for ia, nuc in enumerate(alpha): af[ia] = (col == nuc).sum() af /= af.sum() afs[:, i] = af S = get_entropy(afs) return S