Exemplo n.º 1
0
def optParse(errorflag):
#This function handles the command line options.

	parser = argparse.ArgumentParser(description="Makes pasta alignments from an input directory. Dependencies: core, pasta");

	parser.add_argument("-i", dest="input_dir", help="A directory containing multiple multi-FASTA files to be aligned.");
	parser.add_argument("-s", dest="seq_type", help="Specify the type of input sequences for PASTA. dna, rna, or protein. Default: protein", default="protein");
	parser.add_argument("-v", dest="verbosity", help="An option to control the output printed to the screen. -v 1: print all pasta output, -v 0: print only a progress bar. Default: 1", type=int, default=1);
	parser.add_argument("-o", dest="output_dir", help="A directory to which the aligned sequences will be written.");

	args = parser.parse_args();

	if errorflag == 0:
		if args.input_dir == None or args.output_dir == None:
			parser.print_help();
			sys.exit();

		if args.seq_type.lower() not in ["dna","rna","protein"]:
			core.errorOut(1, "-s must be entered exactly as dna, rna, or protein");
			optParse(1);

		if args.verbosity not in [0,1]:
			core.errorOut(2, "-v must take values of either 0 or 1");
			optParse(1);

		return args.input_dir, args.seq_type.lower(), args.verbosity, args.output_dir;

	elif errorflag == 1:
		parser.print_help();
		sys.exit();
Exemplo n.º 2
0
def optParse(errorflag):
#This function handles the command line options.

	parser = argparse.ArgumentParser(description="Converts sequence formats from one to another. Converts between any of FASTA, Phylip, and Nexus formats. Please note, this script assumes the file extensions of .fa, .ph, and .nex, respectively, for those formats. Dependencies: core");

	parser.add_argument("-i", dest="input", help="Input. Either a directory containing many sequence files or a single sequence file.");
	parser.add_argument("-f", dest="input_type", help="The format of the input sequences.");
	parser.add_argument("-o", dest="output", help="Output. Either the directory where files will be written or simply an output file name.");
	parser.add_argument("-t", dest="output_type", help="The desired output format of the sequences.");

	args = parser.parse_args();

	if errorflag == 0:
		if args.input == None or args.output == None:
			parser.print_help();
			sys.exit();

		intype = args.input_type.lower();
		outtype = args.output_type.lower();

		for t in [intype, outtype]:
			if t not in ["fasta", "phylip", "nexus", "fa", "phy", "ph", "nex", "f", "p", "n"]:
				core.errorOut(1, "-f and -t must take values of fasta, nexus, or phylip");
				optParse(1);

		return args.input, intype[:1], args.output, outtype[:1];

	elif errorflag == 1:
		parser.print_help();
		sys.exit();
Exemplo n.º 3
0
def optParse(errflag):
#This function handles the command line options.

	parser = argparse.ArgumentParser(description="Splits a directory of files for running as multiple jobs on a HPC cluster.");

	parser.add_argument("-i", dest="input", help="Input directory containing all files on which your job will be run.");
	parser.add_argument("-s", dest="split_size", help="The # of files to place in each split. Default: 400", type=int, default=400);
	parser.add_argument("-t", dest="job_time", help="The job walltime in hours. Should be entered as simply an integer. Default: 60", type=int, default=60);
	parser.add_argument("-n", dest="job_name", help="The prefix for the job name.");
	parser.add_argument("-o", dest="output", help="An output directory in which the split directories will be placed. Default: [input directory].job/", default="");

	args = parser.parse_args();

	if errflag == 0:

		if args.input == None or args.job_name == None:
			parser.print_help();
			sys.exit();

		if args.split_size <= 0:
			core.errorOut(1, "-s can only take positive, non-zero values");
			optParse(1);

		if args.job_time <= 0:
			core.errorOut(2, "-t can only take positive, non-zero values");
			optParse(1);
		
		return args.input, args.split_size, args.job_time, args.job_name, args.output;

	else:
		parser.print_help();
		print "Exiting program.";
		sys.exit();
Exemplo n.º 4
0
def optParse(errorflag):

	parser = argparse.ArgumentParser()

	parser.add_argument("-i", dest="input_dir", help="The directory containing your input genes.");
	parser.add_argument("-a", dest="alt_dir", help="The directory containing the PAML output from the alternate hypothesis.");
	parser.add_argument("-n", dest="null_dir", help="The directory containing the PAML output from the null hypothesis.");
	parser.add_argument("-m", dest="run_mode", help="This specifies which genes should be written to the output file: 0 = all genes, 1 = only the genes at the 1 percent significance level, 2 = only the genes at the 5 percent significance level, 3 = only the non-significant genes.", type=int, default=0);
	parser.add_argument("-o", dest="output_file", help="The prefix name of the output file. The suffix and extension (.txt) will be added based on -m.");

	args = parser.parse_args();

	if errorflag == 0:

		if args.input_dir == None or args.alt_dir == None or args.null_dir == None or args.output_file == None:
			core.errorOut(1, "-i, -a, -n, and -o must all be defined");
			optParse(1);

		if args.run_mode not in [0,1,2,3]:
			core.errorOut(2, "-m must take values of 1, 2, 3 or 4");
			optParse(1);

		return args.input_dir, args.alt_dir, args.null_dir, args.run_mode, args.output_file;

	elif errorflag == 1:
		parser.print_help();
		sys.exit();
Exemplo n.º 5
0
def optParse(errorflag):
#This function handles the command line options.

	parser = argparse.ArgumentParser(description="Makes muscle alignments from an input directory. Dependencies: core, muscle");

	parser.add_argument("-i", dest="input_dir", help="A directory containing multiple multi-FASTA files to be aligned.");
	parser.add_argument("-v", dest="verbosity", help="An option to control the output printed to the screen. -v 1: print all muscle output, -v 0: print only a progress bar. Default: 1", type=int, default=1);
	parser.add_argument("-o", dest="output_dir", help="A directory to which the aligned sequences will be written.");

	args = parser.parse_args();

	if errorflag == 0:	
		if args.input_dir == None or args.output_dir == None:
			parser.print_help();
			sys.exit();

		if args.verbosity not in [0,1]:
			core.errorOut(1, "-v must take values of either 0 or 1");
			optParse(1);

		return args.input_dir, args.verbosity, args.output_dir;

	elif errorflag == 1:
		parser.print_help();
		sys.exit();
Exemplo n.º 6
0
def optParse(errorflag):
    # This function handles the command line options.

    parser = argparse.ArgumentParser()

    parser.add_argument(
        "-i",
        dest="input_list",
        help=
        "A comma delimited LIST of Ensembl ortholog lists to combine. The first column of each file must be the same species."
    )
    parser.add_argument("-o",
                        dest="output_file",
                        help="Output file name for combined ortholog list.")

    args = parser.parse_args()

    if errorflag == 0:
        if args.input_list == None or args.output_file == None:
            core.errorOut(1, "Both -i and -o must be defined")
            optParse(1)

        return args.input_list.split(","), args.output_file

    elif errorflag == 1:
        parser.print_help()
        sys.exit()
Exemplo n.º 7
0
def optParse(errflag):
    #This function handles the command line options.

    parser = argparse.ArgumentParser(
        description=
        "Splits a directory of files for running as multiple jobs on a HPC cluster."
    )

    parser.add_argument(
        "-i",
        dest="input",
        help=
        "Input directory containing all files on which your job will be run.")
    parser.add_argument(
        "-s",
        dest="split_size",
        help="The # of files to place in each split. Default: 400",
        type=int,
        default=400)
    parser.add_argument(
        "-t",
        dest="job_time",
        help=
        "The job walltime in hours. Should be entered as simply an integer. Default: 60",
        type=int,
        default=60)
    parser.add_argument("-n",
                        dest="job_name",
                        help="The prefix for the job name.")
    parser.add_argument(
        "-o",
        dest="output",
        help=
        "An output directory in which the split directories will be placed. Default: [input directory].job/",
        default="")

    args = parser.parse_args()

    if errflag == 0:

        if args.input == None or args.job_name == None:
            parser.print_help()
            sys.exit()

        if args.split_size <= 0:
            core.errorOut(1, "-s can only take positive, non-zero values")
            optParse(1)

        if args.job_time <= 0:
            core.errorOut(2, "-t can only take positive, non-zero values")
            optParse(1)

        return args.input, args.split_size, args.job_time, args.job_name, args.output

    else:
        parser.print_help()
        print "Exiting program."
        sys.exit()
Exemplo n.º 8
0
def optParse(errorflag):
    #This function handles the command line options.

    parser = argparse.ArgumentParser()

    parser.add_argument(
        "-i",
        dest="input_file",
        help=
        "A file with tab delimited lists of orthologs (The output from orth_combine.py)."
    )
    parser.add_argument(
        "-s",
        dest="seq_dir",
        help=
        "A directory with full fasta sequences from all the species found in the ortholog list."
    )
    parser.add_argument(
        "-d",
        dest="spec_dict",
        help=
        "A necessary option to associate filename with species identifier... format exactly as follows for ALL species: \"spec1ID:spec1.fa,spec2ID:spec2.fa\""
    )
    parser.add_argument(
        "-m",
        dest="rem_start",
        help=
        "A boolean option to either remove the start Methionine (1) or not (0). Default: 0",
        type=int,
        default=0)
    parser.add_argument(
        "-o",
        dest="output_dir",
        help="Output directory where all combined sequences will be written.")

    args = parser.parse_args()

    if errorflag == 0:

        if args.input_file == None or args.output_dir == None or args.seq_dir == None or args.spec_dict == None:
            core.errorOut(1, "-i, -o, -s, and -d must all be defined")
            optParse(1)

        if args.rem_start not in [0, 1]:
            core.errorOut(2, "-m must take values of either 0 or 1")

        return args.input_file, args.seq_dir, args.spec_dict.split(
            ","), args.rem_start, args.output_dir

    elif errorflag == 1:
        parser.print_help()
        sys.exit()
Exemplo n.º 9
0
def optParse(errorflag):
    #This function handles the command line options.
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "-i",
        dest="input_file",
        help=
        "A file containing a FASTA alignment with ancestral sequences from codeml."
    )
    parser.add_argument(
        "-t",
        dest="tree_file",
        help=
        "The corresponding tree file from the codeml ancestral reconstructions."
    )
    parser.add_argument(
        "-s",
        dest="site_num",
        help="The site in the alignment that you wish to map to the tree.",
        type=int)
    parser.add_argument(
        "-o",
        dest="output_file",
        help="The name of an output file to write the new tree.")
    args = parser.parse_args()

    if errorflag == 0:
        if args.input_file == None or args.tree_file == None or args.site_num == None:
            core.errorOut(1, "-i, -t, and -s must always be defined")
            optParse(1)

        if args.output_file == None:
            of = ""
        else:
            of = args.output_file

        return args.input_file, args.tree_file, args.site_num, of

    elif errorflag == 1:
        parser.print_help()
        sys.exit()
Exemplo n.º 10
0
def optParse(errorflag):
    #This function handles the command line options.

    parser = argparse.ArgumentParser(
        description=
        "Makes muscle alignments from an input directory. Dependencies: core, muscle"
    )

    parser.add_argument(
        "-i",
        dest="input_dir",
        help="A directory containing multiple multi-FASTA files to be aligned."
    )
    parser.add_argument(
        "-v",
        dest="verbosity",
        help=
        "An option to control the output printed to the screen. -v 1: print all muscle output, -v 0: print only a progress bar. Default: 1",
        type=int,
        default=1)
    parser.add_argument(
        "-o",
        dest="output_dir",
        help="A directory to which the aligned sequences will be written.")

    args = parser.parse_args()

    if errorflag == 0:
        if args.input_dir == None or args.output_dir == None:
            parser.print_help()
            sys.exit()

        if args.verbosity not in [0, 1]:
            core.errorOut(1, "-v must take values of either 0 or 1")
            optParse(1)

        return args.input_dir, args.verbosity, args.output_dir

    elif errorflag == 1:
        parser.print_help()
        sys.exit()
Exemplo n.º 11
0
def optParse(errorflag):
#This function handles the command line options.

	parser = argparse.ArgumentParser();

	parser.add_argument("-i", dest="input_directory", help="A directory containing FASTA formatted alignments you wish to check for convergence.");
	parser.add_argument("-s", dest="num_spec", help="The number of species to be chosen from for each replicate.", type=int, default=-1);	
	parser.add_argument("-r", dest="num_reps", help="The number of times to perform the randomization test.", type=int, default=1);
	parser.add_argument("-d", dest="conv_divergence", help="Check for convergent divergent sites by setting this to 1.", type=int, default=0);
	parser.add_argument("-t", dest="num_threads", help="Multiple random replicates can be run in parallel. This sets the number of threads to be used. The quotient of -c and -p MUST be a whole number. Default: 1", type=int, default=1);
	parser.add_argument("-o", dest="output_file", help="Output file name for convergent genes/sites.");

	args = parser.parse_args();

	if errorflag == 0:
		if args.input_directory == None or args.output_file == None or args.num_spec == None:
			core.errorOut(1, "Input (-i), output (-o), and the number of species (-s) must all be specified");
			optParse(1);

		if args.num_reps <= 0:
			core.errorOut(2, "The number of replicates (-r) must be a positive integer");
			optParse(1);

		if args.conv_divergence not in [0,1]:
			core.errorOut(3, "-d must take values of either 0 or 1");
			optParse(1);

		if args.num_threads <= 0:
			core.errorOut(4, "The number of threads (-t) must be a positive integer");
			optParse(1);

		if (args.num_reps % args.num_threads) != 0:
				core.errorOut(5, "The quotient of -c and -p MUST be a whole number");
				optParse(1);

		return args.input_directory, args.num_spec, args.num_reps, args.conv_divergence, args.num_threads, args.output_file;

	elif errorflag == 1:
		parser.print_help();
		print()
		sys.exit();
Exemplo n.º 12
0
def optParse(errorflag):

    parser = argparse.ArgumentParser()

    parser.add_argument("-i", dest="input_dir", help="The directory containing your input genes.")
    parser.add_argument(
        "-a", dest="alt_dir", help="The directory containing the PAML output from the alternate hypothesis."
    )
    parser.add_argument(
        "-n", dest="null_dir", help="The directory containing the PAML output from the null hypothesis."
    )
    parser.add_argument(
        "-m",
        dest="run_mode",
        help="This specifies which genes should be written to the output file: 0 = all genes, 1 = only the genes at the 1 percent significance level, 2 = only the genes at the 5 percent significance level, 3 = only the non-significant genes.",
        type=int,
        default=0,
    )
    parser.add_argument(
        "-o",
        dest="output_file",
        help="The prefix name of the output file. The suffix and extension (.txt) will be added based on -m.",
    )

    args = parser.parse_args()

    if errorflag == 0:

        if args.input_dir == None or args.alt_dir == None or args.null_dir == None or args.output_file == None:
            core.errorOut(1, "-i, -a, -n, and -o must all be defined")
            optParse(1)

        if args.run_mode not in [0, 1, 2, 3]:
            core.errorOut(2, "-m must take values of 1, 2, 3 or 4")
            optParse(1)

        return args.input_dir, args.alt_dir, args.null_dir, args.run_mode, args.output_file

    elif errorflag == 1:
        parser.print_help()
        sys.exit()
Exemplo n.º 13
0
def optParse(errorflag):
#This function handles the command line options.
	parser = argparse.ArgumentParser();
	parser.add_argument("-i", dest="input_file", help="A file containing a FASTA alignment with ancestral sequences from codeml.");
	parser.add_argument("-t", dest="tree_file", help="The corresponding tree file from the codeml ancestral reconstructions.");
	parser.add_argument("-s", dest="site_num", help="The site in the alignment that you wish to map to the tree.", type=int);
	parser.add_argument("-o", dest="output_file", help="The name of an output file to write the new tree.");
	args = parser.parse_args();

	if errorflag == 0:
		if args.input_file == None or args.tree_file == None or args.site_num == None:
			core.errorOut(1, "-i, -t, and -s must always be defined");
			optParse(1);

		if args.output_file == None:
			of = "";
		else:
			of = args.output_file;

		return args.input_file, args.tree_file, args.site_num, of;

	elif errorflag == 1:
		parser.print_help();
		sys.exit();
Exemplo n.º 14
0
def optParse(errorflag):
#This function handles the command line options.

	parser = argparse.ArgumentParser(description="Runs GBlocks on a single .fa file or a directory full of .fa files. Dependencies: core, GBlocks");

	parser.add_argument("-i", dest="input", help="Input. Either a directory containing many FASTA files or a single FASTA file.");
	parser.add_argument("-r", dest="gblocks_path", help="You can specify the full path to your GBlocks executable here. Default: gblocks (assumes you either have an alias or it is in your PATH.", default="gblocks");
	parser.add_argument("-t", dest="seq_type", help="Choose from: protein (p, default), dna, (d), or codon (c).", default="p");
	parser.add_argument("-m", dest="run_mode", help="Run mode. 1: for phylogenetic reconstructions, accepts only masks that are < 20 percent of original file. 2: Conservative, default GBlocks settings. Default: 1", type=int, default=1);
	parser.add_argument("-v", dest="verbosity", help="An option to control the output printed to the screen. 1: print all GBlocks output, 0: print only a progress bar. Default: 1", type=int, default=1);
	parser.add_argument("-l", dest="log_opt", help="A boolean option to tell the script whether to create a logfile (1) or not (0). Default: 1", type=int, default=1);

	args = parser.parse_args();

	if errorflag == 0:
		if args.input == None:
			parser.print_help();
			sys.exit();

		st = args.seq_type.lower();
		if st not in ["p","d","c","protein","dna","codon"]:
			core.errorOut(1, "-t must take values of p, d, or c");
			optParse(1);

		if len(st) > 1:
			st = st[:1];

		if args.run_mode not in [1,2]:
			core.errorOut(2, "-m must take values of either 1 or 2");
			optParse(1);

		if args.verbosity not in [0,1]:
			core.errorOut(3, "-v must take values of either 1 or 0");
			optParse(1);

		if args.log_opt not in [0,1]:
			core.errorOut(4, "-l must take values of either 1 or 0");
			optParse(1);

		return args.input, args.gblocks_path, st, args.run_mode, args.verbosity, args.log_opt;

	elif errorflag == 1:
		parser.print_help();
		sys.exit();
Exemplo n.º 15
0
def optParse(errorflag):
    #This function handles the command line options.

    parser = argparse.ArgumentParser()

    parser.add_argument(
        "-i",
        dest="input_directory",
        help=
        "A directory containing FASTA formatted alignments you wish to check for convergence."
    )
    parser.add_argument(
        "-s",
        dest="num_spec",
        help="The number of species to be chosen from for each replicate.",
        type=int,
        default=-1)
    parser.add_argument(
        "-r",
        dest="num_reps",
        help="The number of times to perform the randomization test.",
        type=int,
        default=1)
    parser.add_argument(
        "-d",
        dest="conv_divergence",
        help="Check for convergent divergent sites by setting this to 1.",
        type=int,
        default=0)
    parser.add_argument(
        "-t",
        dest="num_threads",
        help=
        "Multiple random replicates can be run in parallel. This sets the number of threads to be used. The quotient of -c and -p MUST be a whole number. Default: 1",
        type=int,
        default=1)
    parser.add_argument("-o",
                        dest="output_file",
                        help="Output file name for convergent genes/sites.")

    args = parser.parse_args()

    if errorflag == 0:
        if args.input_directory == None or args.output_file == None or args.num_spec == None:
            core.errorOut(
                1,
                "Input (-i), output (-o), and the number of species (-s) must all be specified"
            )
            optParse(1)

        if args.num_reps <= 0:
            core.errorOut(
                2, "The number of replicates (-r) must be a positive integer")
            optParse(1)

        if args.conv_divergence not in [0, 1]:
            core.errorOut(3, "-d must take values of either 0 or 1")
            optParse(1)

        if args.num_threads <= 0:
            core.errorOut(
                4, "The number of threads (-t) must be a positive integer")
            optParse(1)

        if (args.num_reps % args.num_threads) != 0:
            core.errorOut(5,
                          "The quotient of -c and -p MUST be a whole number")
            optParse(1)

        return args.input_directory, args.num_spec, args.num_reps, args.conv_divergence, args.num_threads, args.output_file

    elif errorflag == 1:
        parser.print_help()
        print()
        sys.exit()
Exemplo n.º 16
0
Arquivo: fa_edit.py Projeto: gwct/core
def optParse(errorflag):
#This function handles the command line options.

	parser = argparse.ArgumentParser(description="A general purpose FASTA editing script.");

	parser.add_argument("-i", dest="input", help="A directory containing FASTA formatted files or a single FASTA file.");
	parser.add_argument("-r", dest="relabel_opt", help="Option to tell the script whether to relabel the FASTA headers (1,2,3) or not (0). 1: Replace header completely. 2: Add new header to beginning of old header. 3: i5k. Default: 1", type=int, default=1);
	parser.add_argument("-j", dest="seq_keep", help="A comma delimited list of sequence IDs to remove from each file.", default="");
	parser.add_argument("-s", dest="spec_dict", help="A string formatted as a Python dictionary with the current species ID as the key and the label to add to the beginning of the FASTA header as the value. Must be provided if -r set to 1.");
	parser.add_argument("-t", dest="trim_opt", help="Boolean to tell the script whether to trim the FASTA headers (1) or not (0). Default: 0", type=int, default=0);
	parser.add_argument("-d", dest="trim_delim", help="The character string at which to trim the FASTA headers if -t is set to 1. Default: \" \"", default=" ");
	parser.add_argument("-p", dest="ss_opt", help="Boolean to tell the script whether to remove start and stops from the alignment (1) or not (0). Default: 0", type=int, default=0);
	parser.add_argument("-m", dest="replacement", help="This option will replace all characters in each sequence with another character. For example, AB will replace all As with Bs. If the input is an alignment, if A: is entered, all As will be replaced with another AA that is not present in the column. For multiple replacements, enter as: AB,CD,EF", default="");
	parser.add_argument("-o", dest="output", help="The directory or file to which the relabeled and/or trimmed FASTA sequences are written.");

	args = parser.parse_args();

	if errorflag == 0:
		if args.input == None or args.output == None:
			parser.print_help();
			sys.exit();

		if args.relabel_opt not in [0,1,2,3]:
			core.errorOut(1, "-r must take values of 0, 1, 2, or 3");
			optParse(1);

		elif args.relabel_opt == 1:
			if args.spec_dict == None:
				core.errorOut(2, "With -r set to 1 or 2, -s must also be specified");
				optParse(1);
			else:
				specs = args.spec_dict.split(",");
				sd = {};
				for each in specs:
					spec = each.split(":");
					sd[spec[0]] = spec[1];
		else:
			sd = "";

		if args.trim_opt not in [0,1]:
			core.errorOut(3, "-t must take values of either 0 or 1");
			optParse(1);

		if args.ss_opt not in [0,1]:
			core.errorOut(4, "-p must take values of either 0 or 1");
			optParse(1);

		replacement = args.replacement;
		if args.replacement != "":
			replacement = replacement.upper();

			if len(replacement) > 2 and replacement.find(",") == -1:
				core.errorOut(1, "-m entered incorrectly");
				optParse(1);

			elif replacement.find(",") != -1:
				replacement = replacement.split(",");

			else:
				replacement = [replacement];

			for each in replacement:
				if len(each) > 2 or (each[1] not in aas and each[1] != ":"):
					core.errorOut(1, "For -m, the second character entered must be a valid amino acid symbol or ':'");
					optParse(1);

		return args.input, args.relabel_opt, sd, args.seq_keep, args.trim_opt, args.trim_delim, args.ss_opt, replacement, args.output;

	elif errorflag == 1:
		parser.print_help();
		sys.exit();
Exemplo n.º 17
0
def optParse(errorflag):
    #This function handles the command line options.

    parser = argparse.ArgumentParser(
        description=
        "Runs codeml on a single .fa file or a directory full of .fa files. Dependencies: core, treeparse, PAML, newickutils"
    )

    parser.add_argument(
        "-i",
        dest="input",
        help=
        "Input. Either a directory containing many FASTA files or a single FASTA file."
    )
    parser.add_argument(
        "-c",
        dest="paml_path",
        help="You must specify the full path to your PAML DIRECTORY here.")
    parser.add_argument(
        "-t",
        dest="tree_file",
        help=
        "A user specified tree for codeml to use. If not specified, codeml will infer the tree.",
        default="")
    parser.add_argument(
        "-p",
        dest="prune_opt",
        help=
        "If not all species present in the tree will be present in each alignment, set this to 1 to prune the tree for each file. Default: 0",
        type=int,
        default=0)
    parser.add_argument(
        "-s",
        dest="paml_seqtype",
        help=
        "The seqtype for codeml to use. 1 (default): Codons; 2: Amino Acids",
        type=int,
        default=1)
    parser.add_argument(
        "-b",
        dest="branch_site",
        help=
        "Specifies the type of run for PAML's branch site test. 0 (default): Do not do branch site test; 1: Do the null model of the branch site test (model=2, NSsite=2, fix_omega=1, omega=1); 2: Do the alternate model of the branch site test (model=2, NSsite=2, fix_omega=0, omega=1). A branch must be specified in your tree file.",
        type=int,
        default=0)
    parser.add_argument(
        "-a",
        dest="anc_opt",
        help=
        "Option to tell PAML to do ancestral reconstruction (1) or not (0). Default: 0.",
        type=int,
        default=0)
    parser.add_argument(
        "-v",
        dest="verbosity",
        help=
        "An option to control the output printed to the screen. 1: print all codeml output, 0: print only a progress bar. Default: 1",
        type=int,
        default=1)
    parser.add_argument(
        "-l",
        dest="log_opt",
        help=
        "A boolean option to tell the script whether to create a logfile (1) or not (0). Default: 1",
        type=int,
        default=1)
    parser.add_argument(
        "-x",
        dest="logdir_suffix",
        help="A string to add on to the end of the output directory.",
        default="")

    args = parser.parse_args()

    if errorflag == 0:

        if args.input == None or args.paml_path == None:
            core.errorOut(1, "Both -i and -c must be set")
            optParse(1)

        if not os.path.isfile(args.tree_file):
            core.errorOut(2, "-t must be a valid tree file name")
            optParse(1)

        if args.prune_opt not in [0, 1]:
            core.errorOut(3, "-p must take values of either 1 or 0")
            optParse(1)

        if args.prune_opt == 1 and args.tree_file == "":
            core.errorOut(4, "With -p set to 1 a tree file must be specified")
            optParse(1)

        if args.paml_seqtype not in [1, 2]:
            core.errorOut(5, "-s must taked values of either 1 or 2")
            optParse(1)

        if args.branch_site not in [0, 1, 2]:
            core.errorOut(6, "-b must take values of 0, 1, or 2")
            optParse(1)

        if args.anc_opt not in [0, 1]:
            core.errorOut(7, "-a must take values of 1 or 0")
            optParse(1)

        if args.verbosity not in [0, 1]:
            core.errorOut(8, "-v must take values of either 1 or 0")
            optParse(1)

        if args.log_opt not in [0, 1]:
            core.errorOut(9, "-l must take values of either 1 or 0")
            optParse(1)

        return args.input, args.paml_path, args.tree_file, args.prune_opt, args.paml_seqtype, args.branch_site, args.anc_opt, args.verbosity, args.log_opt, args.logdir_suffix

    elif errorflag == 1:
        parser.print_help()
        sys.exit()
Exemplo n.º 18
0
def optParse(errorflag):
    #This function handles the command line options.

    parser = argparse.ArgumentParser(
        description=
        "Runs RAxML on a single .fa file or a directory full of .fa files. Dependencies: core, RAxML"
    )

    parser.add_argument(
        "-i",
        dest="input",
        help=
        "Input. Either a directory containing many FASTA files or a single FASTA file."
    )
    parser.add_argument(
        "-r",
        dest="raxml_path",
        help=
        "You can specify the full path to your RAxML executable here. Default: raxml (assumes you either have an alias or it is in your PATH.",
        default="raxml")
    parser.add_argument("-m",
                        dest="raxml_model",
                        help="The DNA or AA model you wish RAxML to use.")
    parser.add_argument(
        "-b",
        dest="bootstrap_reps",
        help=
        "The number of bootstrap replicates you wish RAxML to run with its rapid bootstrapping algorithm. Default: 0",
        type=int,
        default=0)
    parser.add_argument(
        "-t",
        dest="num_threads",
        help=
        "The number of threads you wish to use for the analysis. Default: 1",
        type=int,
        default=1)
    parser.add_argument(
        "-v",
        dest="verbosity",
        help=
        "An option to control the output printed to the screen. 1: print all RAxML output, 0: print only a progress bar. Default: 1",
        type=int,
        default=1)
    parser.add_argument(
        "-c",
        dest="constraint_tree",
        help=
        "A file containing a constraint tree to be used with RAxML's -g option."
    )
    parser.add_argument(
        "--bl",
        dest="estimate_bl",
        help=
        "Use with -c to set RAxML to '-f e' to estimate branch lengths only on the constraint tree",
        action="store_true")
    parser.add_argument(
        "-o",
        dest="output_dir",
        help=
        "The name of the output directory for this run. Default: [datetime]-run_raxml",
        default="")
    parser.add_argument(
        "-l",
        dest="log_opt",
        help=
        "A boolean option to tell the script whether to create a logfile (1) or not (0). Default: 1",
        type=int,
        default=1)

    args = parser.parse_args()

    if errorflag == 0:
        if args.input == None or args.raxml_model == None:
            parser.print_help()
            sys.exit()

        if args.bootstrap_reps < 0:
            core.errorOut(1, "-b can take only positive values")
            optParse(1)

        if args.bootstrap_reps > 100:
            print " ---------------------------------------------------------------------------------------------------"
            print "|*Warning: You have specified more than 100 bootstrap replicates. This could take a very long time. |"
            print " ---------------------------------------------------------------------------------------------------"

        if args.num_threads <= 0:
            core.errorOut(2, "-t can take only positive, non-zero values")
            optParse(1)

        if args.verbosity not in [0, 1]:
            core.errorOut(3, "-v must take values of either 1 or 0")
            optParse(1)

        if args.constraint_tree != None and not os.path.exists(
                args.constraint_tree):
            core.errorOut(4, "Cannot find constraint tree (-c) file!")
            optParse(1)

        if args.estimate_bl and args.constraint_tree == None:
            core.errorOut(
                5, "With --bl set, a constraint tree must also be set with -c")
            optParse(1)

        if args.log_opt not in [0, 1]:
            core.errorOut(6, "-l mus take values of either 1 or 0")
            optParse(1)

        return args.input, args.raxml_path, args.raxml_model, args.bootstrap_reps, args.num_threads, args.verbosity, args.constraint_tree, args.estimate_bl, args.output_dir, args.log_opt

    elif errorflag == 1:
        parser.print_help()
        sys.exit()
Exemplo n.º 19
0
def optParse(errorflag):
    #This function handles the command line options.

    parser = argparse.ArgumentParser(
        description="A general purpose FASTA editing script.")

    parser.add_argument(
        "-i",
        dest="input",
        help=
        "A directory containing FASTA formatted files or a single FASTA file.")
    parser.add_argument(
        "-r",
        dest="relabel_opt",
        help=
        "Option to tell the script whether to relabel the FASTA headers (1,2,3) or not (0). 1: Replace header completely. 2: Add new header to beginning of old header. 3: Add new header to end of old header. 4: i5k. Default: 1",
        type=int,
        default=1)
    parser.add_argument("-l",
                        dest="new_label",
                        help="The new header. Must be specified with -r set.")
    parser.add_argument(
        "-j",
        dest="seq_keep",
        help="A comma delimited list of sequence IDs to remove from each file.",
        default="")
    parser.add_argument(
        "-s",
        dest="spec_dict",
        help=
        "A string formatted as a Python dictionary with the current species ID as the key and the label to add to the beginning of the FASTA header as the value. Must be provided if -r set to 1."
    )
    parser.add_argument(
        "-t",
        dest="trim_opt",
        help=
        "Boolean to tell the script whether to trim the FASTA headers (1) or not (0). Default: 0",
        type=int,
        default=0)
    parser.add_argument(
        "-d",
        dest="trim_delim",
        help=
        "The character string at which to trim the FASTA headers if -t is set to 1. Default: \" \"",
        default=" ")
    parser.add_argument(
        "-p",
        dest="ss_opt",
        help=
        "Boolean to tell the script whether to remove start and stops from the alignment (1) or not (0). Default: 0",
        type=int,
        default=0)
    parser.add_argument(
        "-m",
        dest="replacement",
        help=
        "This option will replace all characters in each sequence with another character. For example, AB will replace all As with Bs. If the input is an alignment, if A: is entered, all As will be replaced with another AA that is not present in the column. For multiple replacements, enter as: AB,CD,EF",
        default="")
    parser.add_argument(
        "-o",
        dest="output",
        help=
        "The directory or file to which the relabeled and/or trimmed FASTA sequences are written."
    )

    args = parser.parse_args()

    if errorflag == 0:
        if args.input == None or args.output == None:
            parser.print_help()
            sys.exit()

        if args.relabel_opt not in [0, 1, 2, 3]:
            core.errorOut(1, "-r must take values of 0, 1, 2, or 3")
            optParse(1)

        # elif args.relabel_opt == 1:
        # 	if args.spec_dict == None:
        # 		core.errorOut(2, "With -r set to 1, 2, or 3, -l must also be specified");
        # 		optParse(1);
        # 	else:
        # 		specs = args.spec_dict.split(",");
        # 		sd = {};
        # 		for each in specs:
        # 			spec = each.split(":");
        # 			sd[spec[0]] = spec[1];
        # else:
        # 	sd = "";

        elif args.relabel_opt == 1:
            if args.new_label == None:
                core.errorOut(
                    2, "With -r set to 1, 2, or 3, -l must also be specified")
                optParse(1)

        if args.trim_opt not in [0, 1]:
            core.errorOut(3, "-t must take values of either 0 or 1")
            optParse(1)

        if args.ss_opt not in [0, 1]:
            core.errorOut(4, "-p must take values of either 0 or 1")
            optParse(1)

        replacement = args.replacement
        if args.replacement != "":
            replacement = replacement.upper()

            if len(replacement) > 2 and replacement.find(",") == -1:
                core.errorOut(1, "-m entered incorrectly")
                optParse(1)

            elif replacement.find(",") != -1:
                replacement = replacement.split(",")

            else:
                replacement = [replacement]

            for each in replacement:
                if len(each) > 2 or (each[1] not in aas and each[1] != ":"):
                    core.errorOut(
                        1,
                        "For -m, the second character entered must be a valid amino acid symbol or ':'"
                    )
                    optParse(1)

        return args.input, args.relabel_opt, args.new_label, args.seq_keep, args.trim_opt, args.trim_delim, args.ss_opt, replacement, args.output

    elif errorflag == 1:
        parser.print_help()
        sys.exit()
Exemplo n.º 20
0
#	if lopt == 1:
#		core.printWrite(lfilename, outline);
#	else:
#		print outline;

############################################
#Main Block
############################################

ins, ppath, treefile, prune, seqtype, bsopt, aopt, v, l, outdir_suffix = optParse(
    0)

starttime = core.getLogTime()

if not os.path.isdir(ppath):
    core.errorOut(10, "-c must be a valid directory path")
    optParse(1)
codeml_path = os.path.join(ppath, "bin", "codeml")
if outdir_suffix != "":
    outdir_suffix = "-" + outdir_suffix

if os.path.isfile(ins):
    fileflag = 1
    indir = os.path.dirname(os.path.realpath(ins))
    indir, script_outdir = core.getOutdir(indir, "run_codeml", starttime)
    outdir = os.path.join(script_outdir, "codeml_out")
    if aopt == 1:
        ancdir = os.path.join(script_outdir, "anc_seqs_fa")
    filelist = [ins]

else:
Exemplo n.º 21
0
    outfile.write(cmd)

    outfile.close()

    return scriptname


############################################
#Main Block
############################################

indir, s, jobtime, jobprefix, outdir = optParse(0)

if not os.path.isdir(indir):
    core.errorOut(3, "-i must be a valid directory path")
    optParse(1)
else:
    indir = os.path.abspath(indir) + "/"

if outdir == "":
    outdir = indir[:len(indir) - 1] + ".job/"
elif not os.path.isdir(outdir):
    core.errorOut(4, "-o must be a valid directory path")
    optParse(1)

print "======================================================================="
print "INPUT  | Splitting files in:\t\t" + indir
print "INFO   | Number of files per split:\t" + str(s)
print "OUTPUT | Output directory:\t\t" + outdir
print "-------------------------------------"
Exemplo n.º 22
0
		cmd = "time -p python2.7 /N/u/grthomas/Mason/bin/gwct/gwct_codeml.py -i " + joboutdir + " -t /N/dc2/scratch/grthomas/marine_june/multiz_mm59.tre -s 2 -p 1 -a 1 -c /N/u/grthomas/Karst/bin/grand-conv/";

		outfile.write(cmd);

		outfile.close();

		return scriptname;

############################################
#Main Block
############################################

indir, s, jobtime, jobprefix, outdir = optParse(0);

if not os.path.isdir(indir):
	core.errorOut(3, "-i must be a valid directory path");
	optParse(1);
else:
	indir = os.path.abspath(indir) + "/";

if outdir == "":
	outdir = indir[:len(indir)-1] + ".job/";
elif not os.path.isdir(outdir):
	core.errorOut(4, "-o must be a valid directory path");
	optParse(1);

print "=======================================================================";
print "INPUT  | Splitting files in:\t\t" + indir;
print "INFO   | Number of files per split:\t" + str(s);
print "OUTPUT | Output directory:\t\t" + outdir;
print "-------------------------------------";
Exemplo n.º 23
0
                    dest="ref",
                    help="One of rat, mouse, or mouse-targets",
                    default="mouse")
args = parser.parse_args()
# Input options.

seq_run_ids, spec_ids, specs_ordered, spec_abbr, basedirs = globs.get()

if args.spec == "all":
    spec = specs_ordered
else:
    spec = args.spec.replace(", ", ",").split(",")
    for s in spec:
        if s not in spec_ids:
            sys.exit(
                core.errorOut("SF2", "Cannot find specified species: " + s))
# Parse the input species.

args.ref = args.ref.lower()
if args.ref not in ['rat', 'mouse']:
    sys.exit(
        core.errorOut("M1", "-ref must be one of either 'rat' or 'mouse'."))
if args.ref == 'mouse':
    indir = "/scratch/gregg_thomas/Murinae-seq/03B-MappedMouse/"
    targetfile = "/scratch/gregg_thomas/Murinae-seq/data/mouse-target-depth.csv"
    tilefile = "/scratch/gregg_thomas/Murinae-seq/data/mouse-tile-depth.csv"
    mapfile = "/scratch/gregg_thomas/Murinae-seq/data/mouse-mapped.csv"
elif args.ref == 'rat':
    indir = "/scratch/gregg_thomas/Murinae-seq/03B-MappedRat/"
    targetfile = "/scratch/gregg_thomas/Murinae-seq/data/rat-target-depth.csv"
    tilefile = "/scratch/gregg_thomas/Murinae-seq/data/rat-tile-depth.csv"
Exemplo n.º 24
0
if args.carnation:
    basedirs = ["/nfs/musculus" + d for d in basedirs]

if args.runtype == "all":
    runtype = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
else:
    runtype = []
    args.runtype = args.runtype.replace(", ", ",").split(",")
    for r in args.runtype:
        if r in seq_run_ids:
            runtype.append(seq_run_ids[r])
        elif r in ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10"]:
            runtype.append(int(r))
        else:
            sys.exit(
                core.errorOut(
                    "CP1", "Cannot find specified sequencing run: " + str(r)))
# Parse the input runtypes.

runstrs = {}
for r in runtype:
    for runstr, runind in seq_run_ids.items():
        if runind == r:
            runstrs[r] = runstr
            #runstrs.append(runstr);
            args.runtype = runstr
# Get the string run type if int is given as input.
#print(runstrs);
#print(runtype);

if args.spec == "all":
    spec = specs_ordered
Exemplo n.º 25
0
    0)

starttime = core.getLogTime()

if infilename.find("/") != -1:
    indir = os.path.dirname(os.path.realpath(infilename)) + "/"
    infilename = infilename[infilename.rfind("/") + 1:]
else:
    indir = os.getcwd() + "/"

indir, script_outdir = core.getOutdir(indir, "supertreemaker", starttime)
print script_outdir
print os.path.basename(os.path.normpath(script_outdir))
if script_outdir_initial != None:
    if not os.path.isdir(script_outdir_initial):
        core.errorOut(8, "-z must be a valid directory")
        optParse(1)

    script_outdir = os.path.join(
        script_outdir_initial,
        os.path.basename(os.path.normpath(script_outdir)))
if outdir_suffix != None:
    if script_outdir[-1] == "/":
        script_outdir = script_outdir[:len(script_outdir) -
                                      1] + "-" + outdir_suffix + "/"
    else:
        script_outdir = script_outdir + "-" + outdir_suffix + "/"

print core.getTime() + " | Creating main output directory:\t" + script_outdir
os.system("mkdir '" + script_outdir + "'")
Exemplo n.º 26
0
def optParse(errorflag):
    #This function handles the command line options.

    parser = argparse.ArgumentParser()

    parser.add_argument(
        "-i",
        dest="input_file",
        help=
        "A file containing a list of trees on which to run SDM and NJ OR a file containing a single tree on which to run r8s."
    )
    parser.add_argument(
        "-r",
        dest="r_output_file",
        help="A file name for R to write the Neighbor Joining tree.")

    parser.add_argument(
        "-j",
        dest="nj_opt",
        help=
        "A boolean option to use SDM to create a consensus matrix and R to create a NJ tree. Default: 0.",
        type=int,
        default=0)
    parser.add_argument(
        "-o",
        dest="nj_outgroup",
        help="The outgroup by which the NJ tree will be rooted.")
    parser.add_argument(
        "-t",
        dest="reroot_opt",
        help=
        "Boolean to reroot (1) the NJ tree or not (0). If set to 1, -o must also be specified. Default: 0",
        type=int,
        default=0)

    parser.add_argument(
        "-d",
        dest="div_est_opt",
        help=
        "A boolean option to estimate divergence times from the NJ tree with r8s (1) or not (0). Default: 0.",
        type=int,
        default=0)
    parser.add_argument("-e",
                        dest="r8s_output_file",
                        help="A file name for r8s to write the final output.")
    parser.add_argument(
        "-n",
        dest="num_sites",
        help=
        "The total number of sites from the alignments used to make the tree; used by r8s."
    )
    parser.add_argument(
        "-s",
        dest="cal_specs",
        help=
        "A list of PAIRS of species that define nodes you wish to constrain times on. Species within a pair should be separated by a comma, pairs should be separated by a space (eg 'pair1s1,pair1s2 pair2s1,pair2s2')."
    )
    parser.add_argument(
        "-a",
        dest="cal_age",
        help=
        "The calibration ages of the nodes defined by the species in -s. The order of this list corresponds to the order of -s. Separate ages by commas. If constraints are to be used the keywords min and/or max are used with hyphens (eg '324,min-99.9-max-121' defines one fixed age of 324 and one constrained age)."
    )
    parser.add_argument(
        "-l",
        dest="log_opt",
        help=
        "A boolean option to tell the script whether to create a logfile (1) or not (0). Default: 1",
        type=int,
        default=1)

    parser.add_argument(
        "-z",
        dest="script_logdir",
        help=
        "A directory in which to place the script output directory. If none is specified, this will default to the directory of the input file"
    )
    parser.add_argument(
        "-x",
        dest="logdir_suffix",
        help="A string to add on to the end of the output directory.")

    args = parser.parse_args()

    if errorflag == 0:
        if args.input_file == None or args.nj_opt == None or args.nj_opt not in [
                0, 1
        ] or args.div_est_opt == None or args.div_est_opt not in [0, 1]:
            core.errorOut(
                1,
                "-i must always be defined. One of -j or -d must also always be defined as 1"
            )
            optParse(1)

        if args.reroot_opt not in [0, 1]:
            core.errorOut(2, "-t must take values of either 1 or 0")
            optParse(1)

        if args.reroot_opt == 1 and args.nj_outgroup == None:
            core.errorOut(
                3,
                "-When -t is set to 1, an outgroup must be specified with -o")
            optParse(1)

        if args.div_est_opt not in [0, 1]:
            core.errorOut(4, "-d must take values of either 1 or 0")
            optParse(1)

        elif args.div_est_opt == 1:
            if args.r8s_output_file == None or args.num_sites == None or args.cal_specs == None or args.cal_age == None:
                core.errorOut(
                    5,
                    "You are missing one or more of the options for div time estimation with r8s. -e, -n, -s, and -a must all be defined"
                )
                optParse(1)
            else:
                if args.cal_specs.find(" ") != -1 and args.cal_age.find(
                        ",") != -1:
                    cal_specs = args.cal_specs.split(" ")
                    cal_age = args.cal_age.split(",")
                else:
                    cal_specs = [args.cal_specs]
                    cal_age = [args.cal_age]
                if len(cal_specs) != len(cal_age):
                    core.errorOut(
                        6,
                        "You must enter the same number of calibration nodes (-s) and calibration ages (-a)"
                    )
                    optParse(1)

        else:
            args.r8s_output_file = None
            args.num_sites = None
            cal_specs = None
            cal_age = None

        if args.log_opt not in [0, 1]:
            core.errorOut(7, "-l must take values of either 1 or 0")
            optParse(1)

        return args.input_file, args.r_output_file, args.nj_opt, args.nj_outgroup, args.reroot_opt, args.div_est_opt, args.r8s_output_file, args.num_sites, cal_specs, cal_age, args.log_opt, args.script_logdir, args.logdir_suffix

    elif errorflag == 1:
        parser.print_help()
        sys.exit()
Exemplo n.º 27
0
def optParse(errorflag):
#This function handles the command line options.

	parser = argparse.ArgumentParser(description="Runs codeml on a single .fa file or a directory full of .fa files. Dependencies: core, treeparse, PAML, newickutils");

	parser.add_argument("-i", dest="input", help="Input. Either a directory containing many FASTA files or a single FASTA file.");
	parser.add_argument("-c", dest="paml_path", help="You must specify the full path to your PAML DIRECTORY here.");
	parser.add_argument("-t", dest="tree_file", help="A user specified tree for codeml to use. If not specified, codeml will infer the tree.", default="");
	parser.add_argument("-p", dest="prune_opt", help="If not all species present in the tree will be present in each alignment, set this to 1 to prune the tree for each file. Default: 0", type=int, default=0);
	parser.add_argument("-s", dest="paml_seqtype", help="The seqtype for codeml to use. 1 (default): Codons; 2: Amino Acids", type=int, default=1);
	parser.add_argument("-b", dest="branch_site", help="Specifies the type of run for PAML's branch site test. 0 (default): Do not do branch site test; 1: Do the null model of the branch site test (model=2, NSsite=2, fix_omega=1, omega=1); 2: Do the alternate model of the branch site test (model=2, NSsite=2, fix_omega=0, omega=1). A branch must be specified in your tree file.", type=int, default=0);
	parser.add_argument("-a", dest="anc_opt", help="Option to tell PAML to do ancestral reconstruction (1) or not (0). Default: 0.", type=int, default=0);
	parser.add_argument("-v", dest="verbosity", help="An option to control the output printed to the screen. 1: print all codeml output, 0: print only a progress bar. Default: 1", type=int, default=1);
	parser.add_argument("-l", dest="log_opt", help="A boolean option to tell the script whether to create a logfile (1) or not (0). Default: 1", type=int, default=1);
	parser.add_argument("-x", dest="logdir_suffix", help="A string to add on to the end of the output directory.");

	args = parser.parse_args();

	if errorflag == 0:

		if args.input == None or args.paml_path == None:
			core.errorOut(1, "Both -i and -c must be set");
			optParse(1);

		if not os.path.isfile(args.tree_file):
			core.errorOut(2, "-t must be a valid tree file name");
			optParse(1);

		if args.prune_opt not in [0,1]:
			core.errorOut(3, "-p must take values of either 1 or 0");
			optParse(1);

		if args.prune_opt == 1 and args.tree_file == "":
			core.errorOut(4, "With -p set to 1 a tree file must be specified");
			optParse(1);

		if args.paml_seqtype not in [1,2]:
			core.errorOut(5, "-s must taked values of either 1 or 2");
			optParse(1);

		if args.branch_site not in [0,1,2]:
			core.errorOut(6, "-b must take values of 0, 1, or 2");
			optParse(1);

		if args.anc_opt not in [0,1]:
			core.errorOut(7, "-a must take values of 1 or 0");
			optParse(1);

		if args.verbosity not in [0,1]:
			core.errorOut(8, "-v must take values of either 1 or 0");
			optParse(1);

		if args.log_opt not in [0,1]:
			core.errorOut(9, "-l must take values of either 1 or 0");
			optParse(1);

		return args.input, args.paml_path, args.tree_file, args.prune_opt, args.paml_seqtype, args.branch_site, args.anc_opt, args.verbosity, args.log_opt, args.logdir_suffix;

	elif errorflag == 1:
		parser.print_help();
		sys.exit();
Exemplo n.º 28
0
#def core.logCheck(lopt, lfilename, outline):
#	if lopt == 1:
#		core.printWrite(lfilename, outline);
#	else:
#		print outline;

############################################
#Main Block
############################################

ins, ppath, treefile, prune, seqtype, bsopt, aopt, v, l, outdir_suffix = optParse(0);

starttime = core.getLogTime();

if not os.path.isdir(ppath):
	core.errorOut(10, "-c must be a valid directory path");
	optParse(1);
elif ppath[-1] != "/":
	ppath = ppath + "/";

if os.path.isfile(ins):
	fileflag = 1;
	indir = os.path.dirname(os.path.realpath(ins)) + "/";
	indir, script_outdir = core.getOutdir(indir, "run_codeml", starttime);
	outdir = script_outdir + "codeml_out/";
	if aopt == 1:
		ancdir = script_outdir + "anc_seqs_fa/";
	filelist = [ins];

else:
	fileflag = 0;
Exemplo n.º 29
0
def optParse(errorflag):
    # This function handles the command line options.

    parser = argparse.ArgumentParser(
        description="Runs RAxML on a single .fa file or a directory full of .fa files. Dependencies: core, RAxML"
    )

    parser.add_argument(
        "-i", dest="input", help="Input. Either a directory containing many FASTA files or a single FASTA file."
    )
    parser.add_argument(
        "-r",
        dest="raxml_path",
        help="You can specify the full path to your RAxML executable here. Default: raxml (assumes you either have an alias or it is in your PATH.",
        default="raxml",
    )
    parser.add_argument("-m", dest="raxml_model", help="The DNA or AA model you wish RAxML to use.")
    parser.add_argument(
        "-b",
        dest="bootstrap_reps",
        help="The number of bootstrap replicates you wish RAxML to run with its rapid bootstrapping algorithm. Default: 0",
        type=int,
        default=0,
    )
    parser.add_argument(
        "-t",
        dest="num_threads",
        help="The number of threads you wish to use for the analysis. Default: 1",
        type=int,
        default=1,
    )
    parser.add_argument(
        "-v",
        dest="verbosity",
        help="An option to control the output printed to the screen. 1: print all RAxML output, 0: print only a progress bar. Default: 1",
        type=int,
        default=1,
    )
    parser.add_argument(
        "-c",
        dest="tree_combine",
        help="A boolean option to tell the script whether to create a file with a list of all the best trees (1) or not (0). Default: 1",
        type=int,
        default=1,
    )
    parser.add_argument(
        "-l",
        dest="log_opt",
        help="A boolean option to tell the script whether to create a logfile (1) or not (0). Default: 1",
        type=int,
        default=1,
    )

    args = parser.parse_args()

    if errorflag == 0:
        if args.input == None or args.raxml_model == None:
            parser.print_help()
            sys.exit()

        if args.bootstrap_reps < 0:
            core.errorOut(1, "-b can take only positive values")
            optParse(1)

        if args.bootstrap_reps > 100:
            print " ---------------------------------------------------------------------------------------------------"
            print "|*Warning: You have specified more than 100 bootstrap replicates. This could take a very long time. |"
            print " ---------------------------------------------------------------------------------------------------"

        if args.num_threads <= 0:
            core.errorOut(2, "-t can take only positive, non-zero values")
            optParse(1)

        if args.verbosity not in [0, 1]:
            core.errorOut(3, "-v must take values of either 1 or 0")
            optParse(1)

        if args.tree_combine not in [0, 1]:
            core.errorOut(4, "-t must take values of either 1 or 0")
            optParse(1)

        if args.log_opt not in [0, 1]:
            core.errorOut(5, "-l mus take values of either 1 or 0")
            optParse(1)

        return (
            args.input,
            args.raxml_path,
            args.raxml_model,
            args.bootstrap_reps,
            args.num_threads,
            args.verbosity,
            args.tree_combine,
            args.log_opt,
        )

    elif errorflag == 1:
        parser.print_help()
        sys.exit()
Exemplo n.º 30
0
infilename, routfilename, njopt, outgroup, rr, d, r8soutfilename, numsites, calspec, calage, l, script_outdir_initial, outdir_suffix = optParse(0);

starttime = core.getLogTime();

if infilename.find("/") != -1:
	indir = os.path.dirname(os.path.realpath(infilename)) + "/";
	infilename = infilename[infilename.rfind("/")+1:];
else:
	indir = os.getcwd() + "/";

indir, script_outdir = core.getOutdir(indir, "supertreemaker", starttime);
print script_outdir;
print os.path.basename(os.path.normpath(script_outdir));
if script_outdir_initial != None:
	if not os.path.isdir(script_outdir_initial):
		core.errorOut(8, "-z must be a valid directory");
		optParse(1);

	script_outdir = os.path.join(script_outdir_initial, os.path.basename(os.path.normpath(script_outdir)));
if outdir_suffix != None:
	if script_outdir[-1] == "/":
		script_outdir = script_outdir[:len(script_outdir)-1] + "-" + outdir_suffix + "/";
	else:
		script_outdir = script_outdir + "-" + outdir_suffix + "/";

print core.getTime() + " | Creating main output directory:\t" + script_outdir;
os.system("mkdir '" + script_outdir + "'");

logfilename = script_outdir + "supertreemaker.log";
logfile = open(logfilename, "w");
logfile.write("");
Exemplo n.º 31
0
def optParse(errorflag):
#This function handles the command line options.

	parser = argparse.ArgumentParser();

	parser.add_argument("-i", dest="input_file", help="A file containing a list of trees on which to run SDM and NJ OR a file containing a single tree on which to run r8s.");
	parser.add_argument("-r", dest="r_output_file", help="A file name for R to write the Neighbor Joining tree.");

	parser.add_argument("-j", dest="nj_opt", help="A boolean option to use SDM to create a consensus matrix and R to create a NJ tree. Default: 0.", type=int, default=0);
	parser.add_argument("-o", dest="nj_outgroup", help="The outgroup by which the NJ tree will be rooted.");
	parser.add_argument("-t", dest="reroot_opt", help="Boolean to reroot (1) the NJ tree or not (0). If set to 1, -o must also be specified. Default: 0", type=int, default=0);

	parser.add_argument("-d", dest="div_est_opt", help="A boolean option to estimate divergence times from the NJ tree with r8s (1) or not (0). Default: 0.", type=int, default=0);
	parser.add_argument("-e", dest="r8s_output_file", help="A file name for r8s to write the final output.");
	parser.add_argument("-n", dest="num_sites", help="The total number of sites from the alignments used to make the tree; used by r8s.");
	parser.add_argument("-s", dest="cal_specs", help="A list of PAIRS of species that define nodes you wish to constrain times on. Species within a pair should be separated by a comma, pairs should be separated by a space (eg 'pair1s1,pair1s2 pair2s1,pair2s2').");
	parser.add_argument("-a", dest="cal_age", help="The calibration ages of the nodes defined by the species in -s. The order of this list corresponds to the order of -s. Separate ages by commas. If constraints are to be used the keywords min and/or max are used with hyphens (eg '324,min-99.9-max-121' defines one fixed age of 324 and one constrained age).");
	parser.add_argument("-l", dest="log_opt", help="A boolean option to tell the script whether to create a logfile (1) or not (0). Default: 1", type=int, default=1);

	parser.add_argument("-z", dest="script_logdir", help="A directory in which to place the script output directory. If none is specified, this will default to the directory of the input file");
	parser.add_argument("-x", dest="logdir_suffix", help="A string to add on to the end of the output directory.");

	args = parser.parse_args();

	if errorflag == 0:
		if args.input_file == None or args.nj_opt == None or args.nj_opt not in [0,1] or args.div_est_opt == None or args.div_est_opt not in [0,1]:
			core.errorOut(1, "-i must always be defined. One of -j or -d must also always be defined as 1");
			optParse(1);

		if args.reroot_opt not in [0,1]:
			core.errorOut(2, "-t must take values of either 1 or 0");
			optParse(1);

		if args.reroot_opt == 1 and args.nj_outgroup == None:
			core.errorOut(3, "-When -t is set to 1, an outgroup must be specified with -o");
			optParse(1);

		if args.div_est_opt not in [0,1]:
			core.errorOut(4, "-d must take values of either 1 or 0");
			optParse(1);

		elif args.div_est_opt == 1:
			if args.r8s_output_file == None or args.num_sites == None or args.cal_specs == None or args.cal_age == None:
				core.errorOut(5, "You are missing one or more of the options for div time estimation with r8s. -e, -n, -s, and -a must all be defined");
				optParse(1);
			else:
				if args.cal_specs.find(" ") != -1 and args.cal_age.find(",") != -1:
					cal_specs = args.cal_specs.split(" ");
					cal_age = args.cal_age.split(",");
				else:
					cal_specs = [args.cal_specs];
					cal_age = [args.cal_age];
				if len(cal_specs) != len(cal_age):
					core.errorOut(6, "You must enter the same number of calibration nodes (-s) and calibration ages (-a)");
					optParse(1);

		else:
			args.r8s_output_file = None;
			args.num_sites = None;
			cal_specs = None;
			cal_age = None;

		if args.log_opt not in [0,1]:
			core.errorOut(7, "-l must take values of either 1 or 0");
			optParse(1);

		return args.input_file, args.r_output_file, args.nj_opt, args.nj_outgroup, args.reroot_opt, args.div_est_opt, args.r8s_output_file, args.num_sites, cal_specs, cal_age, args.log_opt, args.script_logdir, args.logdir_suffix;

	elif errorflag == 1:
		parser.print_help();
		sys.exit();
Exemplo n.º 32
0
def optParse(errorflag):
    #This function handles the command line options.

    parser = argparse.ArgumentParser(
        description=
        "Runs GBlocks on a single .fa file or a directory full of .fa files. Dependencies: core, GBlocks"
    )

    parser.add_argument(
        "-i",
        dest="input",
        help=
        "Input. Either a directory containing many FASTA files or a single FASTA file."
    )
    parser.add_argument(
        "-r",
        dest="gblocks_path",
        help=
        "You can specify the full path to your GBlocks executable here. Default: gblocks (assumes you either have an alias or it is in your PATH.",
        default="gblocks")
    parser.add_argument(
        "-t",
        dest="seq_type",
        help="Choose from: protein (p, default), dna, (d), or codon (c).",
        default="p")
    parser.add_argument(
        "-m",
        dest="run_mode",
        help=
        "Run mode. 1: for phylogenetic reconstructions, accepts only masks that are < 20 percent of original file. 2: Conservative, default GBlocks settings. Default: 1",
        type=int,
        default=1)
    parser.add_argument(
        "-v",
        dest="verbosity",
        help=
        "An option to control the output printed to the screen. 1: print all GBlocks output, 0: print only a progress bar. Default: 1",
        type=int,
        default=1)
    parser.add_argument(
        "-l",
        dest="log_opt",
        help=
        "A boolean option to tell the script whether to create a logfile (1) or not (0). Default: 1",
        type=int,
        default=1)

    args = parser.parse_args()

    if errorflag == 0:
        if args.input == None:
            parser.print_help()
            sys.exit()

        st = args.seq_type.lower()
        if st not in ["p", "d", "c", "protein", "dna", "codon"]:
            core.errorOut(1, "-t must take values of p, d, or c")
            optParse(1)

        if len(st) > 1:
            st = st[:1]

        if args.run_mode not in [1, 2]:
            core.errorOut(2, "-m must take values of either 1 or 2")
            optParse(1)

        if args.verbosity not in [0, 1]:
            core.errorOut(3, "-v must take values of either 1 or 0")
            optParse(1)

        if args.log_opt not in [0, 1]:
            core.errorOut(4, "-l must take values of either 1 or 0")
            optParse(1)

        return args.input, args.gblocks_path, st, args.run_mode, args.verbosity, args.log_opt

    elif errorflag == 1:
        parser.print_help()
        sys.exit()
Exemplo n.º 33
0
############################################################

core.runTime("# Count read files")

parser = argparse.ArgumentParser(description="Count read files by species")
parser.add_argument(
    "-i",
    dest="input",
    help="A directory structured as <genus-species>/<run-type>/<fastq files>",
    default=False)
args = parser.parse_args()
# Input options.

seq_run_ids, spec_ids, specs_ordered, spec_abbr, basedirs = globs.get()
if not os.path.isdir(args.input):
    sys.exit(core.errorOut("RC1",
                           "Cannot find specified input directory (-i)"))

specdirs = os.listdir(args.input)
spec_counts = {s: [0, 0]
               for s in specdirs}

for spec in specdirs:
    count = 0
    specdir = os.path.join(args.input, spec)
    rundirs = os.listdir(specdir)
    for run in rundirs:
        rundir = os.path.join(specdir, run)
        readfiles = os.listdir(rundir)

        if "single" in run:
            spec_counts[spec][0] += len(readfiles)