Exemplo n.º 1
0
def optParse():
# This function handles the command line options and does some error checking.

	parser = argparse.ArgumentParser(description="Runs codeml on a directory full of .fa files. Files MUST have .fa extension. Dependencies: PAML, newickutils (if you want to prune your tree with --prune)");

	parser.add_argument("-i", dest="input", help="Input. A directory containing many FASTA (.fa) files.");
	parser.add_argument("-p", dest="paml_path", help="You must specify the full path to your PAML DIRECTORY here.");
	parser.add_argument("-t", dest="tree_file", help="A user specified tree for codeml to use. If not specified, codeml will infer the tree.", default="");
	parser.add_argument("--prune", dest="prune_opt", help="If not all species present in the tree will be present in each alignment, set this flag to prune the tree for each file.", action="store_true");
	# parser.add_argument("-seqtype", dest="paml_seqtype", help="Enter either 'codon' or 'aa'. Default value is 'codon'.", default='codon');
	parser.add_argument("-v", dest="verbosity", help="An option to control the output printed to the screen. 1: print all codeml output, 0: print only a progress bar. Default: 1", type=int, default=1);
	parser.add_argument("-o", dest="output", help="Desired output directory. If none is entered, will be determined automatically.", default=False);

	args = parser.parse_args();

	if args.input == None or args.paml_path == None:
		sys.exit(gc.errorOut(1, "Both -i must be set."));
	if not os.path.isdir(args.input) or not os.path.isdir(args.paml_path):
		sys.exit(gc.errorOut(2, "Both -i must be valid directory paths!"));
	else:
		args.input = os.path.abspath(args.input);
		args.paml_path = os.path.abspath(args.paml_path);
	if not os.path.isfile(args.tree_file):
		sys.exit(gc.errorOut(3, "-t must be a valid file name."));

	try:
		td, tree, r = gt.treeParse(open(args.tree_file, "r").read().replace("\n",""));
	except:
		sys.exit(gc.errorOut(4, "-t does not contain a valid Newick string!"));

	if args.verbosity not in [0,1]:
		sys.exit(gc.errorOut(6, "-v must take values of either 1 or 0"));

	return args.input, args.paml_path, args.tree_file, args.prune_opt, args.verbosity, args.output;
Exemplo n.º 2
0
Arquivo: gwct.py Projeto: gwct/gwct
def splitThreads(arglist):
	filelist_func = arglist[0];
	orig_targets = arglist[1];
	u = arglist[2];
	p = arglist[3];
	threads = arglist[4];
	results_dict = {};

	for filename in filelist_func:
		if ".fa" not in filename:
			continue;
		print filename;

		if u != 1:
			gid = filename[:filename.index("_ancprobs.fa")];
		elif u == 1:
			gid = filename[:filename.index(".fa")];
		gene = "_".join(gid.split("_")[:2]);
		chromosome = gid[gid.find("chr"):gid.find("chr")+4]
		infilename = os.path.join(indir, filename);
		if u != 1:
			treefilename = os.path.join(indir, gid + "_anc.tre");
			tree = open(treefilename,"r").read().replace("\n","");
			tree_dict, new_tree = gwctree.treeParse(tree);

		if orig_targets != "":
			results_key = str(orig_targets);
			if results_key not in results_dict:
				results_dict[results_key] = [[],[],[]];
			targets = copy.deepcopy(orig_targets);
			#Resets the targets for each gene.
		
			results_dict = convergence.convCheck(infilename, results_dict, results_key, targets, prob_thresh, chromosome, gene, tree_dict, u);
			#Checking for convergent sites

		else:
			target_nodes = getTargs(tree_dict, p);
			for targets in target_nodes:
				if tree_dict[targets[0]][1] == targets[1] or tree_dict[targets[1]][1] == targets[0]:
					continue;
				#If one node is the ancestor of the other, skip this comparison.

				node_key = "";
				for n in targets:
					if "_" in n:
						node_key = node_key + n[n.index("_")+1:];
					else:
						node_key = node_key + tree_dict[n][3];
					if n == targets[0]:
						node_key = node_key + "-";

				if node_key not in results_dict:
					results_dict[node_key] = [[],[],[]];

				results_dict = convergence.convCheck(infilename, results_dict, node_key, targets, prob_thresh, chromosome, gene, tree_dict, u);

	return results_dict;
Exemplo n.º 3
0
gwctcore.logCheck(l, logfilename, "-------------------------------------");
#sys.exit();
if not os.path.exists(outdir):
	gwctcore.logCheck(l, logfilename, gwctcore.getTime() + " | Creating codeml output directory:\t" + outdir);
	cmd = "mkdir " + outdir;
	os.system(cmd);

if aopt == 1:
	if not os.path.exists(ancdir):
		gwctcore.logCheck(l, logfilename, gwctcore.getTime() + " | Creating directory to pass ancestral sequences and trees:\t" + ancdir);
		cmd = "mkdir " + ancdir;
		os.system(cmd);

if prune == 1:
	gwctcore.logCheck(l, logfilename, gwctcore.getTime() + " | Retrieving tree info...");
	td, tree = gwctree.treeParse(open(treefile, "r").read().replace("\n",""),0);

	tips = [];
	for node in td:
		if td[node][2] == 'tip':
			tips.append(node);

gwctcore.logCheck(l, logfilename, gwctcore.getTime() + " | Starting codeml runs...\n");
if v == 0:
	codeml_logfile = os.path.join(script_outdir, "codeml.log");

ctlfilename = "codeml.ctl";

i = 0;
numbars = 0;
donepercent = [];
Exemplo n.º 4
0
Arquivo: gwct.py Projeto: gwct/gwct
def splitThreads(arglist):
    filelist_func = arglist[0]
    orig_targets = arglist[1]
    p = arglist[2]
    threads = arglist[3]
    results_dict = {}

    for filename in filelist_func:
        print filename

        if "-ancprobs.fa" in filename:
            gid = filename[:filename.index("-ancprobs.fa")]
            treefilename = os.path.join(ancdir, gid + "-anc.tre")
        else:
            gid = filename[:filename.index("_ancprobs.fa")]
            treefilename = os.path.join(ancdir, gid + "_anc.tre")
        infilename = os.path.join(ancdir, filename)

        tree = open(treefilename, "r").read().strip()
        tree_dict, new_tree, root = gt.treeParse(tree)
        #gene = "-".join(gid.split("_")[:2]);
        #chromosome = gid[gid.find("chr"):gid.find("chr")+4]

        if orig_targets != "":
            results_key = str(orig_targets)
            if results_key not in results_dict:
                results_dict[results_key] = [[], [], []]
            targets = copy.deepcopy(orig_targets)
            #Resets the targets for each gene.

            results_dict = convergence.convCheck(infilename, results_dict,
                                                 results_key, targets,
                                                 prob_thresh, gid, tree_dict,
                                                 pairwise)
            #Checking for convergent sites

        else:
            target_nodes = getTargs(tree_dict, p)
            for targets in target_nodes:
                if tree_dict[targets[0]][1] == targets[1] or tree_dict[
                        targets[1]][1] == targets[0]:
                    continue
                # If one node is the ancestor of the other, skip this comparison.
                node_key = ""
                for n in targets:
                    if "_" in n:
                        node_key += n[n.index("_") + 1:]
                    else:
                        node_key += tree_dict[n][3]
                    if n == targets[0]:
                        node_key += "-"

                if node_key not in results_dict:
                    results_dict[node_key] = [[], [], []]

                print targets
                targets = [[t] for t in targets]
                print targets

                results_dict = convergence.convCheck(infilename, results_dict,
                                                     node_key, targets,
                                                     prob_thresh, gid,
                                                     tree_dict, pairwise)

    return results_dict
Exemplo n.º 5
0
	gc.printWrite(logfilename, " -> Printing all codeml output to the screen (-v 1)");
else:
	gc.printWrite(logfilename, " -> Silent mode. Not printing codeml output to the screen (-v 0)");
gc.printWrite(logfilename, "-------------------------------------");
# Print IO info to screen for user.

filelist = os.listdir(indir);
print "+ Creating codeml output directory:\t" + codemldir;
os.system("mkdir " + codemldir);
print "+ Creating directory to pass ancestral sequences and trees:\t" + ancdir;
os.system("mkdir " + ancdir);
# Create output directories.

if prune:
	print " -> Retrieving tree info...";
	td, tree, r = gt.treeParse(open(treefile, "r").read().replace("\n",""),0);
	tips = [node for node in td if td[node][2] == 'tip'];
# Read tree info for pruning

gc.printWrite(logfilename, gc.getTime() + " | Starting codeml runs...\n");
if v == 0:
	codeml_logfile = os.path.join(outdir, "codeml.stdout");
ctlfilename = "codeml.ctl";

i, numbars, donepercent, numfiles = 0, 0, [], len(filelist);
# Loading bar stuff

fa_skip = [];
for cur_file in filelist:
	if v == 0:
		numbars, donepercent = gc.loadingBar(i, numfiles, donepercent, numbars);
Exemplo n.º 6
0
Arquivo: gwct.py Projeto: gwct/gwct
def splitThreads(arglist):
    filelist_func = arglist[0]
    orig_targets = arglist[1]
    u = arglist[2]
    p = arglist[3]
    threads = arglist[4]
    results_dict = {}

    for filename in filelist_func:
        if ".fa" not in filename:
            continue
        print filename

        if u != 1:
            gid = filename[:filename.index("_ancprobs.fa")]
        elif u == 1:
            gid = filename[:filename.index(".fa")]
        gene = "_".join(gid.split("_")[:2])
        chromosome = gid[gid.find("chr"):gid.find("chr") + 4]
        infilename = os.path.join(indir, filename)
        if u != 1:
            treefilename = os.path.join(indir, gid + "_anc.tre")
            tree = open(treefilename, "r").read().replace("\n", "")
            tree_dict, new_tree = gwctree.treeParse(tree)

        if orig_targets != "":
            results_key = str(orig_targets)
            if results_key not in results_dict:
                results_dict[results_key] = [[], [], []]
            targets = copy.deepcopy(orig_targets)
            #Resets the targets for each gene.

            results_dict = convergence.convCheck(infilename, results_dict,
                                                 results_key, targets,
                                                 prob_thresh, chromosome, gene,
                                                 tree_dict, u)
            #Checking for convergent sites

        else:
            target_nodes = getTargs(tree_dict, p)
            for targets in target_nodes:
                if tree_dict[targets[0]][1] == targets[1] or tree_dict[
                        targets[1]][1] == targets[0]:
                    continue
                #If one node is the ancestor of the other, skip this comparison.

                node_key = ""
                for n in targets:
                    if "_" in n:
                        node_key = node_key + n[n.index("_") + 1:]
                    else:
                        node_key = node_key + tree_dict[n][3]
                    if n == targets[0]:
                        node_key = node_key + "-"

                if node_key not in results_dict:
                    results_dict[node_key] = [[], [], []]

                results_dict = convergence.convCheck(infilename, results_dict,
                                                     node_key, targets,
                                                     prob_thresh, chromosome,
                                                     gene, tree_dict, u)

    return results_dict