Exemplo n.º 1
0
def write_tree_rates(rate_dict, alignment_dict, cdna_dicts, out_filename,
                     xfold_only, xfold_degeneracy):
    outfile = file(out_filename, 'w')
    outfile.write("orf\tnal\tcor\tpcor\tncor\tns\tss\tmsid\tmfal\tadn\n")
    ngenes = 0
    for orf in rate_dict.keys():
        (length, spec_orf_list, aligned_prots) = alignment_dict[orf]
        (dns, dss, tree) = rate_dict[orf]
        root = phylip.parse_tree(tree)
        species = root.leaves()
        msid = align_stats(aligned_prots, sequence_identity, stats.Mean)
        mfal = align_stats(aligned_prots, frac_aligned, min)
        #msid = min_seq_id(aligned_prots)
        #mfal = min_frac_aligned(aligned_prots)
        if mfal < 0.5:
            print "# rejected orf (mfal,msid) %s (%1.2f,%1.2f)" % (orf, mfal,
                                                                   msid)
            continue
        num_genes = len(species)

        #   Compute correlation between nonsyn-syn changes
        try:
            #target_orf = dict(spec_orf_list)[spec_name]
            ((r, p, n), ns,
             ss) = get_rate_correlation_windowed(dns, dss, aligned_prots,
                                                 cdna_dicts, spec_orf_list,
                                                 xfold_only, xfold_degeneracy)
            outfile.write("%s\t%d\t%1.3f\t%1.3f\t%d\t%1.2f\t%1.2f\t%1.2f\t%1.2f\t%1.4f\n" % \
               (orf,num_genes,r,p,n,sum(ns),sum(ss),msid,mfal,sum(ns)/float(len(dns))))
            ngenes += 1
            outfile.flush()
        except stats.StatsError, se:
            continue
Exemplo n.º 2
0
def write_tree_rates(rate_dict, alignment_dict, cdna_dicts, out_filename, xfold_only, xfold_degeneracy):
	outfile = file(out_filename,'w')
	outfile.write("orf\tnal\tcor\tpcor\tncor\tns\tss\tmsid\tmfal\tadn\n")
	ngenes = 0
	for orf in rate_dict.keys():
		(length, spec_orf_list, aligned_prots) = alignment_dict[orf]
		(dns, dss, tree) = rate_dict[orf]
		root = phylip.parse_tree(tree)
		species = root.leaves()
		msid = align_stats(aligned_prots, sequence_identity, stats.Mean)
		mfal = align_stats(aligned_prots, frac_aligned, min)
		#msid = min_seq_id(aligned_prots)
		#mfal = min_frac_aligned(aligned_prots)
		if mfal < 0.5:
			print "# rejected orf (mfal,msid) %s (%1.2f,%1.2f)" % (orf, mfal, msid)			
			continue
		num_genes = len(species)

		#   Compute correlation between nonsyn-syn changes
		try:
			#target_orf = dict(spec_orf_list)[spec_name]
			((r,p,n), ns, ss) = get_rate_correlation_windowed(dns, dss, aligned_prots, cdna_dicts, spec_orf_list, xfold_only, xfold_degeneracy)
			outfile.write("%s\t%d\t%1.3f\t%1.3f\t%d\t%1.2f\t%1.2f\t%1.2f\t%1.2f\t%1.4f\n" % \
				  (orf,num_genes,r,p,n,sum(ns),sum(ss),msid,mfal,sum(ns)/float(len(dns))))
			ngenes += 1
			outfile.flush()
		except stats.StatsError, se:
			continue
Exemplo n.º 3
0
def get_tree_rates(ortho_dict, alignment_dict, cdna_dicts, tree_string, begin_index, end_index):
	tree = phylip.parse_tree(tree_string)
	tree_species = [n.name for n in tree.leaves()]
	
	n_genes = 0

	# Individual genes
	rate_ancestor_cache = {}

	# Assemble gene list
	keys = []
	for gene in ortho_dict.keys():
		try:
			(length, corr_keys, aligned_prots) = alignment_dict[gene]
			alignment_species = [spec for (spec,orf) in corr_keys]
		except KeyError, ke:
			print "# Couldn't find alignments for", ke
			continue

		# Must have concordance between tree species and alignment species
		if set(tree_species).intersection(set(alignment_species)) == set(tree_species):
			keys.append(gene)
Exemplo n.º 4
0
def get_tree_rates(ortho_dict, alignment_dict, cdna_dicts, tree_string,
                   begin_index, end_index):
    tree = phylip.parse_tree(tree_string)
    tree_species = [n.name for n in tree.leaves()]

    n_genes = 0

    # Individual genes
    rate_ancestor_cache = {}

    # Assemble gene list
    keys = []
    for gene in ortho_dict.keys():
        try:
            (length, corr_keys, aligned_prots) = alignment_dict[gene]
            alignment_species = [spec for (spec, orf) in corr_keys]
        except KeyError, ke:
            print "# Couldn't find alignments for", ke
            continue

        # Must have concordance between tree species and alignment species
        if set(tree_species).intersection(
                set(alignment_species)) == set(tree_species):
            keys.append(gene)