def optParse(): # This function handles the command line options and does some error checking. parser = argparse.ArgumentParser(description="Runs codeml on a directory full of .fa files. Files MUST have .fa extension. Dependencies: PAML, newickutils (if you want to prune your tree with --prune)"); parser.add_argument("-i", dest="input", help="Input. A directory containing many FASTA (.fa) files."); parser.add_argument("-p", dest="paml_path", help="You must specify the full path to your PAML DIRECTORY here."); parser.add_argument("-t", dest="tree_file", help="A user specified tree for codeml to use. If not specified, codeml will infer the tree.", default=""); parser.add_argument("--prune", dest="prune_opt", help="If not all species present in the tree will be present in each alignment, set this flag to prune the tree for each file.", action="store_true"); # parser.add_argument("-seqtype", dest="paml_seqtype", help="Enter either 'codon' or 'aa'. Default value is 'codon'.", default='codon'); parser.add_argument("-v", dest="verbosity", help="An option to control the output printed to the screen. 1: print all codeml output, 0: print only a progress bar. Default: 1", type=int, default=1); parser.add_argument("-o", dest="output", help="Desired output directory. If none is entered, will be determined automatically.", default=False); args = parser.parse_args(); if args.input == None or args.paml_path == None: sys.exit(gc.errorOut(1, "Both -i must be set.")); if not os.path.isdir(args.input) or not os.path.isdir(args.paml_path): sys.exit(gc.errorOut(2, "Both -i must be valid directory paths!")); else: args.input = os.path.abspath(args.input); args.paml_path = os.path.abspath(args.paml_path); if not os.path.isfile(args.tree_file): sys.exit(gc.errorOut(3, "-t must be a valid file name.")); try: td, tree, r = gt.treeParse(open(args.tree_file, "r").read().replace("\n","")); except: sys.exit(gc.errorOut(4, "-t does not contain a valid Newick string!")); if args.verbosity not in [0,1]: sys.exit(gc.errorOut(6, "-v must take values of either 1 or 0")); return args.input, args.paml_path, args.tree_file, args.prune_opt, args.verbosity, args.output;
def splitThreads(arglist): filelist_func = arglist[0]; orig_targets = arglist[1]; u = arglist[2]; p = arglist[3]; threads = arglist[4]; results_dict = {}; for filename in filelist_func: if ".fa" not in filename: continue; print filename; if u != 1: gid = filename[:filename.index("_ancprobs.fa")]; elif u == 1: gid = filename[:filename.index(".fa")]; gene = "_".join(gid.split("_")[:2]); chromosome = gid[gid.find("chr"):gid.find("chr")+4] infilename = os.path.join(indir, filename); if u != 1: treefilename = os.path.join(indir, gid + "_anc.tre"); tree = open(treefilename,"r").read().replace("\n",""); tree_dict, new_tree = gwctree.treeParse(tree); if orig_targets != "": results_key = str(orig_targets); if results_key not in results_dict: results_dict[results_key] = [[],[],[]]; targets = copy.deepcopy(orig_targets); #Resets the targets for each gene. results_dict = convergence.convCheck(infilename, results_dict, results_key, targets, prob_thresh, chromosome, gene, tree_dict, u); #Checking for convergent sites else: target_nodes = getTargs(tree_dict, p); for targets in target_nodes: if tree_dict[targets[0]][1] == targets[1] or tree_dict[targets[1]][1] == targets[0]: continue; #If one node is the ancestor of the other, skip this comparison. node_key = ""; for n in targets: if "_" in n: node_key = node_key + n[n.index("_")+1:]; else: node_key = node_key + tree_dict[n][3]; if n == targets[0]: node_key = node_key + "-"; if node_key not in results_dict: results_dict[node_key] = [[],[],[]]; results_dict = convergence.convCheck(infilename, results_dict, node_key, targets, prob_thresh, chromosome, gene, tree_dict, u); return results_dict;
gwctcore.logCheck(l, logfilename, "-------------------------------------"); #sys.exit(); if not os.path.exists(outdir): gwctcore.logCheck(l, logfilename, gwctcore.getTime() + " | Creating codeml output directory:\t" + outdir); cmd = "mkdir " + outdir; os.system(cmd); if aopt == 1: if not os.path.exists(ancdir): gwctcore.logCheck(l, logfilename, gwctcore.getTime() + " | Creating directory to pass ancestral sequences and trees:\t" + ancdir); cmd = "mkdir " + ancdir; os.system(cmd); if prune == 1: gwctcore.logCheck(l, logfilename, gwctcore.getTime() + " | Retrieving tree info..."); td, tree = gwctree.treeParse(open(treefile, "r").read().replace("\n",""),0); tips = []; for node in td: if td[node][2] == 'tip': tips.append(node); gwctcore.logCheck(l, logfilename, gwctcore.getTime() + " | Starting codeml runs...\n"); if v == 0: codeml_logfile = os.path.join(script_outdir, "codeml.log"); ctlfilename = "codeml.ctl"; i = 0; numbars = 0; donepercent = [];
def splitThreads(arglist): filelist_func = arglist[0] orig_targets = arglist[1] p = arglist[2] threads = arglist[3] results_dict = {} for filename in filelist_func: print filename if "-ancprobs.fa" in filename: gid = filename[:filename.index("-ancprobs.fa")] treefilename = os.path.join(ancdir, gid + "-anc.tre") else: gid = filename[:filename.index("_ancprobs.fa")] treefilename = os.path.join(ancdir, gid + "_anc.tre") infilename = os.path.join(ancdir, filename) tree = open(treefilename, "r").read().strip() tree_dict, new_tree, root = gt.treeParse(tree) #gene = "-".join(gid.split("_")[:2]); #chromosome = gid[gid.find("chr"):gid.find("chr")+4] if orig_targets != "": results_key = str(orig_targets) if results_key not in results_dict: results_dict[results_key] = [[], [], []] targets = copy.deepcopy(orig_targets) #Resets the targets for each gene. results_dict = convergence.convCheck(infilename, results_dict, results_key, targets, prob_thresh, gid, tree_dict, pairwise) #Checking for convergent sites else: target_nodes = getTargs(tree_dict, p) for targets in target_nodes: if tree_dict[targets[0]][1] == targets[1] or tree_dict[ targets[1]][1] == targets[0]: continue # If one node is the ancestor of the other, skip this comparison. node_key = "" for n in targets: if "_" in n: node_key += n[n.index("_") + 1:] else: node_key += tree_dict[n][3] if n == targets[0]: node_key += "-" if node_key not in results_dict: results_dict[node_key] = [[], [], []] print targets targets = [[t] for t in targets] print targets results_dict = convergence.convCheck(infilename, results_dict, node_key, targets, prob_thresh, gid, tree_dict, pairwise) return results_dict
gc.printWrite(logfilename, " -> Printing all codeml output to the screen (-v 1)"); else: gc.printWrite(logfilename, " -> Silent mode. Not printing codeml output to the screen (-v 0)"); gc.printWrite(logfilename, "-------------------------------------"); # Print IO info to screen for user. filelist = os.listdir(indir); print "+ Creating codeml output directory:\t" + codemldir; os.system("mkdir " + codemldir); print "+ Creating directory to pass ancestral sequences and trees:\t" + ancdir; os.system("mkdir " + ancdir); # Create output directories. if prune: print " -> Retrieving tree info..."; td, tree, r = gt.treeParse(open(treefile, "r").read().replace("\n",""),0); tips = [node for node in td if td[node][2] == 'tip']; # Read tree info for pruning gc.printWrite(logfilename, gc.getTime() + " | Starting codeml runs...\n"); if v == 0: codeml_logfile = os.path.join(outdir, "codeml.stdout"); ctlfilename = "codeml.ctl"; i, numbars, donepercent, numfiles = 0, 0, [], len(filelist); # Loading bar stuff fa_skip = []; for cur_file in filelist: if v == 0: numbars, donepercent = gc.loadingBar(i, numfiles, donepercent, numbars);
def splitThreads(arglist): filelist_func = arglist[0] orig_targets = arglist[1] u = arglist[2] p = arglist[3] threads = arglist[4] results_dict = {} for filename in filelist_func: if ".fa" not in filename: continue print filename if u != 1: gid = filename[:filename.index("_ancprobs.fa")] elif u == 1: gid = filename[:filename.index(".fa")] gene = "_".join(gid.split("_")[:2]) chromosome = gid[gid.find("chr"):gid.find("chr") + 4] infilename = os.path.join(indir, filename) if u != 1: treefilename = os.path.join(indir, gid + "_anc.tre") tree = open(treefilename, "r").read().replace("\n", "") tree_dict, new_tree = gwctree.treeParse(tree) if orig_targets != "": results_key = str(orig_targets) if results_key not in results_dict: results_dict[results_key] = [[], [], []] targets = copy.deepcopy(orig_targets) #Resets the targets for each gene. results_dict = convergence.convCheck(infilename, results_dict, results_key, targets, prob_thresh, chromosome, gene, tree_dict, u) #Checking for convergent sites else: target_nodes = getTargs(tree_dict, p) for targets in target_nodes: if tree_dict[targets[0]][1] == targets[1] or tree_dict[ targets[1]][1] == targets[0]: continue #If one node is the ancestor of the other, skip this comparison. node_key = "" for n in targets: if "_" in n: node_key = node_key + n[n.index("_") + 1:] else: node_key = node_key + tree_dict[n][3] if n == targets[0]: node_key = node_key + "-" if node_key not in results_dict: results_dict[node_key] = [[], [], []] results_dict = convergence.convCheck(infilename, results_dict, node_key, targets, prob_thresh, chromosome, gene, tree_dict, u) return results_dict