plt.close() # Plot unique MSAs with largest deviations OGids = set() outliers = sorted([ record for record in records if record[0] < -1 and record[2] < a * record[0]**2 ]) # Use -1 to exclude near-zero floating point rounding errors for record in outliers: # Unpack variables OGid, msa, regions = record[3], record[4], record[5] if OGid in OGids: continue OGids.add(OGid) # Plot MSA with regions msa = [seq for _, seq in sorted(msa, key=lambda x: tip_order[x[0]]) ] # Re-order sequences and extract seq only line = np.zeros(len(msa[0])) for region in regions: line[region] = 1 plot_msa_lines(msa, line, figsize=(16, 6)) plt.savefig(f'out/{len(OGids)-1}_{OGid}.png', bbox_inches='tight', dpi=400) plt.close() """ DEPENDENCIES ../../ortho_tree/ctree_WAG/ctree_WAG.py ../../ortho_tree/ctree_WAG/out/100red_ni.txt ../realign_hmmer1/realign_hmmer1.py ../realign_hmmer1/out/*.mfa """
labels[state].append((int(start), int(stop))) OGid2labels[OGid] = labels if not os.path.exists('out/'): os.mkdir('out/') for OGid, labels in OGid2labels.items(): msa = trim_terminals(read_fasta(f'../../ortho_MSA/realign_hmmer1/out/{OGid}.mfa')) if labels['0'] and labels['0'][0][0] == 0: offset = labels['0'][0][1] else: offset = 0 lines = {} for state in ['1A', '1B', '2', '3']: line = np.zeros(len(msa[0][1])) for start, stop in labels[state]: line[slice(start-offset, stop-offset)] = 1 lines[state] = line plot_msa_lines([seq[1].upper() for seq in msa], [lines['1A'], lines['2'], lines['3'], lines['1B']], figsize=(15, 6)) plt.savefig(f'out/{OGid}.png', bbox_inches='tight') plt.close() """ DEPENDENCIES ../../ortho_MSA/realign_hmmer1/realign_hmmer1.py ../../ortho_MSA/realign_hmmer1/out/*.mfa ../config/segments.tsv """
emit1 = sum(col) emits.append((emit0, emit1)) col0 = col # Instantiate model e_dists_rv = { state: bernoulli_betabinom_frozen(p, len(msa) - 1, a, b) for state, (p, a, b) in params['e_dists'].items() } model = hmm.HMM(params['t_dists'], e_dists_rv, params['start_dist']) # Decode states and plot fbs = model.forward_backward(emits) draw.plot_msa_lines([seq.upper() for _, seq in msa], [fbs['1A'], fbs['2'], fbs['3'], fbs['1B']], figsize=(15, 6)) plt.savefig(f'out/{OGid}_wide.png', bbox_inches='tight') plt.close() draw.plot_msa_lines([seq.upper() for _, seq in msa], [fbs['1A'], fbs['2'], fbs['3'], fbs['1B']], figsize=(8, 8)) plt.savefig(f'out/{OGid}_tall.png', bbox_inches='tight') plt.close() """ DEPENDENCIES ../../ortho_MSA/realign_hmmer1/realign_hmmer1.py ../../ortho_MSA/realign_hmmer1/out/*.mfa ../config/segments.tsv ./fit.py
emits.append((j, 1)) else: emits.append((j, 0)) # Instantiate model e_dists_rv = { '0': msaBernoulli(ps), '1': msaBernoulli([params['e_param'] for _ in range(len(ps))]) } model = hmm.HMM(params['t_dists'], e_dists_rv, params['start_dist']) # Decode states and plot fbs = model.forward_backward(emits) draw.plot_msa_lines( [seq.upper() for _, seq in msa], fbs['1'], msa_labels=[ppid if ppid in header else '' for header, _ in msa], msa_labelsize=4, figsize=(15, 6)) plt.savefig(f'out/{OGid}_{ppid}_wide.png', bbox_inches='tight') plt.close() draw.plot_msa_lines( [seq.upper() for _, seq in msa], fbs['1'], msa_labels=[ppid if ppid in header else '' for header, _ in msa], msa_labelsize=4, figsize=(8, 8)) plt.savefig(f'out/{OGid}_{ppid}_tall.png', bbox_inches='tight') plt.close() """ DEPENDENCIES
# Load decoded states posterior = [] with open(f'../insertion_trim/out/{row.OGid}.tsv') as file: header = file.readline().rstrip('\n').split('\t') for line in file: fields = {key: float(value) for key, value in zip(header, line.rstrip('\n').split('\t'))} posterior.append(fields['2'] + fields['3']) posterior = np.array(posterior) gradient = np.gradient(posterior) # Make trim plot slices = get_slices(msa, posterior, gradient) trims = np.zeros(len(posterior)) for s in slices: trims[s] = 1 msa = [seq.upper() for _, seq in sorted(msa, key=lambda x: tip_order[x[0]])] # Re-order sequences and extract seq only plot_msa_lines(msa, [posterior, trims], figsize=(15, 6)) plt.savefig(f'out/{label}/{i}_{row.OGid}.png', bbox_inches='tight') plt.close() """ DEPENDENCIES ../../ortho_MSA/realign_hmmer2/realign_hmmer2.py ../../ortho_MSA/realign_hmmer2/out/* ../../ortho_tree/ctree_WAG/ctree_WAG.py ../../ortho_tree/ctree_WAG/out/100red_ni.txt ../insertion_trim/decode.py ./insertion_trim/out/*.tsv """