for ni,node in enumerate(prediction.non_terminals): node.name = str(ni+1) # write tree to file Phylo.write(prediction.T, dirname+'/reconstructed_tree.nwk', 'newick') # write inferred ancestral sequences to file with open(dirname+'/ancestral_sequences.fasta', 'w') as outfile: for node in prediction.non_terminals: outfile.write('>'+node.name+'\n'+str(node.seq)+'\n') ## write sequence ranking to file # terminal nodes prediction.rank_by_method(nodes = prediction.terminals, method = 'mean_fitness'); with open(dirname+'/sequence_ranking_terminals.txt', 'w') as outfile: outfile.write('#'+'\t'.join(['name','rank', 'mean', 'standard dev'])+'\n') for node in prediction.terminals: outfile.write('\t'.join(map(str,[node.name, node.rank, node.mean_fitness, np.sqrt(node.var_fitness)]))+'\n') # terminal nodes prediction.rank_by_method(nodes = prediction.non_terminals, method = 'mean_fitness'); with open(dirname+'/sequence_ranking_nonterminals.txt', 'w') as outfile: outfile.write('#'+'\t'.join(['name','rank', 'mean', 'variance'])+'\n') for node in prediction.non_terminals: outfile.write('\t'.join(map(str,[node.name, node.rank, node.mean_fitness, np.sqrt(node.var_fitness)]))+'\n') if params.plot: tree_utils.plot_prediction_tree(prediction) plt.savefig(dirname+'/marked_up_tree.pdf')
Phylo.write(prediction.T, dirname + '/reconstructed_tree.nwk', 'newick') # write inferred ancestral sequences to file with open(dirname + '/ancestral_sequences.fasta', 'w') as outfile: for node in prediction.non_terminals: outfile.write('>' + node.name + '\n' + str(node.seq) + '\n') ## write sequence ranking to file # terminal nodes prediction.rank_by_method(nodes=prediction.terminals, method='polarizer') with open(dirname + '/sequence_ranking_terminals.txt', 'w') as outfile: outfile.write('#' + '\t'.join(['name', 'rank', 'LBI']) + '\n') for node in prediction.terminals: outfile.write( '\t'.join(map(str, [node.name, node.rank, node.polarizer])) + '\n') # terminal nodes prediction.rank_by_method(nodes=prediction.non_terminals, method='polarizer') with open(dirname + '/sequence_ranking_nonterminals.txt', 'w') as outfile: outfile.write('#' + '\t'.join(['name', 'rank', 'LBI']) + '\n') for node in prediction.non_terminals: outfile.write( '\t'.join(map(str, [node.name, node.rank, node.polarizer])) + '\n') # plot the tree if desired if params.plot: tree_utils.plot_prediction_tree(prediction, method='polarizer', internal=True) plt.savefig(dirname + '/marked_up_tree.pdf')
node.name = str(ni+1) # write tree to file Phylo.write(prediction.T, dirname+'/reconstructed_tree.nwk', 'newick') # write inferred ancestral sequences to file with open(dirname+'/ancestral_sequences.fasta', 'w') as outfile: for node in prediction.non_terminals: outfile.write('>'+node.name+'\n'+str(node.seq)+'\n') ## write sequence ranking to file # terminal nodes prediction.rank_by_method(nodes = prediction.terminals, method = 'polarizer'); with open(dirname+'/sequence_ranking_terminals.txt', 'w') as outfile: outfile.write('#'+'\t'.join(['name','rank', 'LBI'])+'\n') for node in prediction.terminals: outfile.write('\t'.join(map(str,[node.name, node.rank, node.polarizer]))+'\n') # terminal nodes prediction.rank_by_method(nodes = prediction.non_terminals, method = 'polarizer'); with open(dirname+'/sequence_ranking_nonterminals.txt', 'w') as outfile: outfile.write('#'+'\t'.join(['name','rank', 'LBI'])+'\n') for node in prediction.non_terminals: outfile.write('\t'.join(map(str,[node.name, node.rank, node.polarizer]))+'\n') # plot the tree if desired if params.plot: tree_utils.plot_prediction_tree(prediction, method='polarizer', internal=True) plt.savefig(dirname+'/marked_up_tree.pdf')
method = methods[0] color_internal = True if params.flutype.startswith('H3N2') and year in laessig_prediction: seq_labels = { prediction.best_node(method=method[0], nodes=method[2]).name: '*', laessig_prediction[year].name: "L&L" } else: seq_labels = {prediction.best_node().name: '*'} tree_utils.label_nodes(prediction.T, seq_labels) tree_utils.erase_color(prediction.T) tree_utils.erase_color(combined_data.T) tree_utils.plot_prediction_tree(prediction, internal=color_internal) plt.title( "predicting " + params.flutype + " season " + str(year) + "/" + str(year + 1) + ": " + str( np.round(distances[method[0] + method[1]] / distances['average'], 4))) #plt.savefig('../figures/'+base_name+'_prediction_'+method[0]+method[1]+'_'+name_mod+'.pdf') # plot a combined figure # color according to season pred_names = [c.name for c in prediction.terminals] tree_utils.erase_color(combined_data.T) for c in combined_data.T.get_terminals(): if c.name in pred_names: c.color = (178, 34, 34) else:
otherseqsnames=[] combined_data = test_flu.make_combined_data(prediction, test_data, otherseqsnames) seq_labels = {prediction.best_node().name:'*'} #, laessig_prediction[year].name:"L&L"} tree_utils.label_nodes(prediction.T, seq_labels) tree_utils.label_nodes(combined_data.T, seq_labels) # plot a combined figure fig = plt.figure(figsize = (12,6)) #subplot 1: only the prediction data ax = plt.subplot(121) #add panel label plt.text(-0.06,0.95,'A', transform = plt.gca().transAxes, fontsize = 36) plt.title('until Feb '+str(year)) plt.tight_layout() tree_utils.plot_prediction_tree(prediction, axes=ax, cb=True, offset = 0.0005, internal=False) #subplot 2: prediction data and test data ax = plt.subplot(122) #add panel label plt.text(-0.06,0.95,'B', transform = plt.gca().transAxes, fontsize = 36) plt.title('until Feb '+str(year)+' + season '+str(year)+"/"+str(year+1)+" (grey)") pred_names = [c.name for c in prediction.terminals] tree_utils.erase_color(combined_data.T) prediction.color_other_tree(combined_data.T.get_terminals(), offset = 0.0005) for c in combined_data.T.get_terminals(): if c.name in pred_names: pass # c.color = (178, 34, 34 ) else: c.color = (0,255,255)
# set up the filtering criteria and select sequences from the master alignment criteria = [(date(year - 1, 5, 1), date(year, 2, 28), [region], sample_size) for region in prediction_regions] my_flu_alignment = flu_alignment(aln_fname, outgroup, annotation, criteria=criteria, cds={ 'begin': 0, 'end': 987, 'pad': 0 }) # run the prediction prediction = flu_ranking(my_flu_alignment, boost=0.5) top_seq = prediction.predict() print top_seq # plot the tree colored by the prediction tree_utils.plot_prediction_tree(prediction) # plot the distribution of sampling dates y, x, bin_names = my_flu_alignment.sampling_distribution(bins=10) import matplotlib.pyplot as plt plt.figure() plt.plot(x[1:], y) plt.xticks(x[1:], map(str, [b for b in bin_names]), rotation=30)
prediction.best_node('polarizer').name: '*' } #, laessig_prediction[year].name:"L&L"} tree_utils.label_nodes(prediction.T, seq_labels) tree_utils.label_nodes(combined_data.T, seq_labels) # plot a combined figure fig = plt.figure(figsize=(12, 6)) #subplot 1: only the prediction data ax = plt.subplot(121) #add panel label plt.text(-0.06, 0.95, 'A', transform=plt.gca().transAxes, fontsize=36) plt.title('until Feb ' + str(year)) plt.tight_layout() tree_utils.plot_prediction_tree(prediction, method='polarizer', axes=ax, cb=True, offset=0.0005, internal=False) #subplot 2: prediction data and test data ax = plt.subplot(122) #add panel label plt.text(-0.06, 0.95, 'B', transform=plt.gca().transAxes, fontsize=36) plt.title('until Feb ' + str(year) + ' + season ' + str(year) + "/" + str(year + 1) + " (grey)") pred_names = [c.name for c in prediction.terminals] tree_utils.erase_color(combined_data.T) prediction.color_other_tree(combined_data.T.get_terminals(), method='polarizer', offset=0.0005) for c in combined_data.T.get_terminals():
prediction = sequence_ranking.sequence_ranking( seq_data, eps_branch_length=eps_branch_length, pseudo_count=5, methods=['mean_fitness'], D=diffusion, distance_scale=distance_scale, samp_frac=samp_frac) best_node = prediction.predict() # Write fitness tree to file (for mutation annotation) Phylo.write(prediction.T, outfile_fitness_tree, "newick") # Get fitness changes on each branch df_fitness = get_fitness_changes(prediction) df_fitness.sort_values("delta_mean_fitness", ascending=False, inplace=True) # Write fitness changes to file df_fitness.to_csv(outfile_df_fitness) # Plot tree colored by fitness with and without node labels tree_utils.plot_prediction_tree(prediction) plt.savefig(outfile_tree_pdf) tree_utils.plot_prediction_tree(prediction, node_label_func=lambda x: x.name) plt.savefig(outfile_tree_pdf_labeled) print "Done!!"
prediction.best_node().name: '*' } #, laessig_prediction[year].name:"L&L"} tree_utils.label_nodes(prediction.T, seq_labels) tree_utils.label_nodes(combined_data.T, seq_labels) # plot a combined figure fig = plt.figure(figsize=(12, 6)) #subplot 1: only the prediction data ax = plt.subplot(121) #add panel label plt.text(-0.06, 0.95, 'A', transform=plt.gca().transAxes, fontsize=36) plt.title('until Feb ' + str(year)) plt.tight_layout() tree_utils.plot_prediction_tree(prediction, axes=ax, cb=True, offset=0.0005, internal=False) #subplot 2: prediction data and test data ax = plt.subplot(122) #add panel label plt.text(-0.06, 0.95, 'B', transform=plt.gca().transAxes, fontsize=36) plt.title('until Feb ' + str(year) + ' + season ' + str(year) + "/" + str(year + 1) + " (grey)") pred_names = [c.name for c in prediction.terminals] tree_utils.erase_color(combined_data.T) prediction.color_other_tree(combined_data.T.get_terminals(), offset=0.0005) for c in combined_data.T.get_terminals(): if c.name in pred_names: pass
# combine the test data, the prediction data and possible other sequences # and build a tree combined_data = test_flu.make_combined_data(prediction, test_data, otherseqsnames) method = methods[0] color_internal = True if params.flutype.startswith('H3N2') and year in laessig_prediction: seq_labels = {prediction.best_node(method= method[0], nodes = method[2]).name:'*', laessig_prediction[year].name:"L&L"} else: seq_labels = {prediction.best_node().name:'*'} tree_utils.label_nodes(prediction.T, seq_labels) tree_utils.erase_color(prediction.T) tree_utils.erase_color(combined_data.T) tree_utils.plot_prediction_tree(prediction, internal=color_internal) plt.title("predicting "+params.flutype+" season "+str(year)+"/"+str(year+1)+": "+ str(np.round(distances[method[0]+method[1]]/distances['average'],4))) #plt.savefig('../figures/'+base_name+'_prediction_'+method[0]+method[1]+'_'+name_mod+'.pdf') # plot a combined figure # color according to season pred_names = [c.name for c in prediction.terminals] tree_utils.erase_color(combined_data.T) for c in combined_data.T.get_terminals(): if c.name in pred_names: c.color = (178, 34, 34 ) else: c.color = (0,255,255) prediction.interpolate_color(combined_data.T)
otherseqsnames=[] combined_data = test_flu.make_combined_data(prediction, test_data, otherseqsnames) seq_labels = {prediction.best_node('polarizer').name:'*'} #, laessig_prediction[year].name:"L&L"} tree_utils.label_nodes(prediction.T, seq_labels) tree_utils.label_nodes(combined_data.T, seq_labels) # plot a combined figure fig = plt.figure(figsize = (12,6)) #subplot 1: only the prediction data ax = plt.subplot(121) #add panel label plt.text(-0.06,0.95,'A', transform = plt.gca().transAxes, fontsize = 36) plt.title('until Feb '+str(year)) plt.tight_layout() tree_utils.plot_prediction_tree(prediction, method='polarizer', axes=ax, cb=True, offset = 0.0005, internal=False) #subplot 2: prediction data and test data ax = plt.subplot(122) #add panel label plt.text(-0.06,0.95,'B', transform = plt.gca().transAxes, fontsize = 36) plt.title('until Feb '+str(year)+' + season '+str(year)+"/"+str(year+1)+" (grey)") pred_names = [c.name for c in prediction.terminals] tree_utils.erase_color(combined_data.T) prediction.color_other_tree(combined_data.T.get_terminals(), method='polarizer', offset = 0.0005) for c in combined_data.T.get_terminals(): if c.name in pred_names: pass # c.color = (178, 34, 34 ) else: c.color = (0,255,255)