def _main(): parser = ArgumentParser() parser.add_argument("-v", dest="verbose", action="store_true") parser.add_argument("--debug", dest="debug", action="store_true") parser.add_argument("--dir", dest='directory', type=str, default='') parser.add_argument("--cpu", dest='cores', type=int, default=1) parser.add_argument(dest="targets", nargs="*") args = parser.parse_args() APPSPATH = os.path.expanduser("~/.etetoolkit/ext_apps-latest/") ETEHOMEDIR = os.path.expanduser("~/.etetoolkit/") if pexist(pjoin('/etc/etetoolkit/', 'ext_apps-latest')): # if a copy of apps is part of the ete distro, use if by default APPSPATH = pjoin('/etc/etetoolkit/', 'ext_apps-latest') ETEHOMEDIR = '/etc/etetoolkit/' else: # if not, try a user local copy APPSPATH = pjoin(ETEHOMEDIR, 'ext_apps-latest') TARGET_DIR = args.directory while not pexist(TARGET_DIR): TARGET_DIR = input('target directory? [%s]:' % ETEHOMEDIR).strip() if TARGET_DIR == '': TARGET_DIR = ETEHOMEDIR break if TARGET_DIR == ETEHOMEDIR: try: os.mkdir(ETEHOMEDIR) except OSError: pass version_file = "latest.tar.gz" print(colorify('Downloading latest version of tools...', "green"), file=sys.stderr) sys.stderr.flush() urlretrieve( "https://github.com/jhcepas/ext_apps/archive/%s" % version_file, pjoin(TARGET_DIR, version_file)) print(colorify('Decompressing...', "green"), file=sys.stderr) tfile = tarfile.open(pjoin(TARGET_DIR, version_file), 'r:gz') tfile.extractall(TARGET_DIR) print(colorify('Compiling tools...', "green"), file=sys.stderr) sys.path.insert(0, pjoin(TARGET_DIR, 'ext_apps-latest')) import compile_all s = compile_all.compile_all(targets=args.targets, verbose=args.verbose, cores=args.cores) return s
def _main(): parser = ArgumentParser() parser.add_argument("-v", dest="verbose", action="store_true") parser.add_argument("--debug", dest="debug", action="store_true") parser.add_argument("--dir", dest='directory', type=str, default='') parser.add_argument("--cpu", dest='cores', type=int, default=1) parser.add_argument(dest="targets", nargs="*") args = parser.parse_args() APPSPATH = os.path.expanduser("~/.etetoolkit/ext_apps-latest/") ETEHOMEDIR = os.path.expanduser("~/.etetoolkit/") if pexist(pjoin('/etc/etetoolkit/', 'ext_apps-latest')): # if a copy of apps is part of the ete distro, use if by default APPSPATH = pjoin('/etc/etetoolkit/', 'ext_apps-latest') ETEHOMEDIR = '/etc/etetoolkit/' else: # if not, try a user local copy APPSPATH = pjoin(ETEHOMEDIR, 'ext_apps-latest') TARGET_DIR = args.directory while not pexist(TARGET_DIR): TARGET_DIR = input('target directory? [%s]:' %ETEHOMEDIR).strip() if TARGET_DIR == '': TARGET_DIR = ETEHOMEDIR break if TARGET_DIR == ETEHOMEDIR: try: os.mkdir(ETEHOMEDIR) except OSError: pass version_file = "latest.tar.gz" print (colorify('Downloading latest version of tools...', "green"), file=sys.stderr) sys.stderr.flush() urlretrieve("https://github.com/jhcepas/ext_apps/archive/%s" %version_file, pjoin(TARGET_DIR, version_file)) print(colorify('Decompressing...', "green"), file=sys.stderr) tfile = tarfile.open(pjoin(TARGET_DIR, version_file), 'r:gz') tfile.extractall(TARGET_DIR) print(colorify('Compiling tools...', "green"), file=sys.stderr) sys.path.insert(0, pjoin(TARGET_DIR, 'ext_apps-latest')) import compile_all s = compile_all.compile_all(targets=args.targets, verbose=args.verbose, cores=args.cores) return s
def _main(): global BASEPATH, APPSPATH, args APPSPATH = os.path.expanduser("~/.etetoolkit/ext_apps-latest/") ETEHOMEDIR = os.path.expanduser("~/.etetoolkit/") if os.path.exists(pjoin('/etc/etetoolkit/', 'ext_apps-latest')): # if a copy of apps is part of the ete distro, use if by default APPSPATH = pjoin('/etc/etetoolkit/', 'ext_apps-latest') ETEHOMEDIR = '/etc/etetoolkit/' else: # if not, try a user local copy APPSPATH = pjoin(ETEHOMEDIR, 'ext_apps-latest') if len(sys.argv) == 1: if not pexist(APPSPATH): print(colorify('\nWARNING: external applications directory are not found at %s' %APPSPATH, "yellow"), file=sys.stderr) print(colorify('Use "ete build install_tools" to install or upgrade tools', "orange"), file=sys.stderr) elif len(sys.argv) > 1: _config_path = pjoin(BASEPATH, 'phylobuild.cfg') if sys.argv[1] == "install_tools": import urllib import tarfile print (colorify('Downloading latest version of tools...', "green"), file=sys.stderr) if len(sys.argv) > 2: TARGET_DIR = sys.argv[2] else: TARGET_DIR = '' while not pexist(TARGET_DIR): TARGET_DIR = input('target directory? [%s]:' %ETEHOMEDIR).strip() if TARGET_DIR == '': TARGET_DIR = ETEHOMEDIR break if TARGET_DIR == ETEHOMEDIR: try: os.mkdir(ETEHOMEDIR) except OSError: pass version_file = "latest.tar.gz" urllib.urlretrieve("https://github.com/jhcepas/ext_apps/archive/%s" %version_file, pjoin(TARGET_DIR, version_file)) print(colorify('Decompressing...', "green"), file=sys.stderr) tfile = tarfile.open(pjoin(TARGET_DIR, version_file), 'r:gz') tfile.extractall(TARGET_DIR) print(colorify('Compiling tools...', "green"), file=sys.stderr) sys.path.insert(0, pjoin(TARGET_DIR, 'ext_apps-latest')) import compile_all s = compile_all.compile_all() sys.exit(s) elif sys.argv[1] == "check": if not pexist(APPSPATH): print(colorify('\nWARNING: external applications directory are not found at %s' %APPSPATH, "yellow"), file=sys.stderr) print(colorify('Use "ete build install_tools" to install or upgrade', "orange"), file=sys.stderr) # setup portable apps config = {} for k in apps.builtin_apps: cmd = apps.get_call(k, APPSPATH, "/tmp", "1") config[k] = cmd apps.test_apps(config) sys.exit(0) elif sys.argv[1] in ("workflows", "wl"): if sys.argv[1] == "wl": print(colorify("WARNING: 'wl' is obsolete and will be removed in the future, use 'workflows' instead", "orange"), file=sys.stderr) base_config = check_config(_config_path) list_workflows(base_config) sys.exit(0) elif sys.argv[1] == "apps": base_config = check_config(_config_path) list_apps(base_config, set(sys.argv[2:])) sys.exit(0) elif sys.argv[1] == "show": base_config = check_config(_config_path) try: block = sys.argv[2] except IndexError: print("Expected a block name, found none") sys.exit(1) block_detail(block, base_config) sys.exit(0) elif sys.argv[1] == "dump": if len(sys.argv) > 2: base_config = check_config(_config_path) block_detail(sys.argv[2], base_config, color=False) else: print(open(_config_path).read()) sys.exit(0) elif sys.argv[1] == "validate": print('Validating configuration file ', sys.argv[2]) if pexist(sys.argv[2]): base_config = check_config(sys.argv[2]) print('Everything ok') else: print('File does not exist') sys.exit(-1) sys.exit(0) elif sys.argv[1] == "version": print(__VERSION__, '(%s)' %__DATE__) sys.exit(0) parser = argparse.ArgumentParser(description=__DESCRIPTION__ + __EXAMPLES__, formatter_class=argparse.RawDescriptionHelpFormatter) # Input data related flags input_group = parser.add_argument_group('==== Input Options ====') input_group.add_argument('[check | workflows | apps | show | dump | validate | version | install_tools]', nargs='?', help=("Utility commands:\n" "check: check that external applications are executable.\n" "wl: show a list of available workflows.\n" "show [name]: show the configuration parameters of a given workflow or application config block.\n" "dump [name]: dump the configuration parameters of the specified block (allows to modify predefined config).\n" "validate [configfile]: Validate a custom configuration file.\n" "version: Show current version.\n" )) input_group.add_argument("-c", "--config", dest="configfile", type=is_file, default=BASEPATH+'/phylobuild.cfg', help="Custom configuration file.") input_group.add_argument("--tools-dir", dest="tools_dir", type=str, help="Custom path where external software is avaiable.") input_group.add_argument("-w", dest="workflow", required=True, nargs='+', help="One or more gene-tree workflow names. All the specified workflows will be executed using the same input data.") input_group.add_argument("-m", dest="supermatrix_workflow", required=False, nargs='+', help="One or more super-matrix workflow names. All the specified workflows will be executed using the same input data.") input_group.add_argument("-a", dest="aa_seed_file", type=is_file, help="Initial multi sequence file with" " protein sequences.") input_group.add_argument("-n", dest="nt_seed_file", type=is_file, help="Initial multi sequence file with" " nucleotide sequences") # input_group.add_argument("--seqformat", dest="seqformat", # choices=["fasta", "phylip", "iphylip", "phylip_relaxed", "iphylip_relaxed"], # default="fasta", # help="") input_group.add_argument("--dealign", dest="dealign", action="store_true", help="when used, gaps in the orginal fasta file will" " be removed, thus allowing to use alignment files as input.") input_group.add_argument("--seq-name-parser", dest="seq_name_parser", type=str, help=("A Perl regular expression containing a matching group, which is" " used to parse sequence names from the input files. Use this option to" " customize the names that should be shown in the output files." " The matching group (the two parentheses) in the provided regular" " expression will be assumed as sequence name. By default, all " " characthers until the first blank space or tab delimiter are " " used as the sequence names."), default='^([^\s]+)') input_group.add_argument("--no-seq-rename", dest="seq_rename", action="store_false", help="If used, sequence names will NOT be" " internally translated to 10-character-" "identifiers.") input_group.add_argument("--no-seq-checks", dest="no_seq_checks", action="store_true", help="Skip consistency sequence checks for not allowed symbols, etc.") input_group.add_argument("--no-seq-correct", dest="no_seq_correct", action="store_true", help="Skip sequence compatibility changes: i.e. U, J and O symbols are converted into X by default.") dup_names_group = input_group.add_mutually_exclusive_group() dup_names_group.add_argument("--ignore-dup-seqnames", dest="ignore_dup_seqnames", action = "store_true", help=("If duplicated sequence names exist in the input" " fasta file, a single random instance will be used.")) dup_names_group.add_argument("--rename-dup-seqnames", dest="rename_dup_seqnames", action = "store_true", help=("If duplicated sequence names exist in the input" " fasta file, duplicates will be renamed.")) input_group.add_argument("--seqdb", dest="seqdb", type=str, help="Uses a custom sequence database file") # supermatrix workflow input_group.add_argument("--cogs", dest="cogs_file", type=is_file, help="A file defining clusters of orthologous groups." " One per line. Tab delimited sequence ids. ") input_group.add_argument("--lineages", dest="lineages_file", type=is_file, help="A file containing the (sorted) lineage " "track of each species. It enables " "NPR algorithm to fix what taxonomic " "levels should be optimized." "Note that linage tracks must consist in " "a comma separated list of taxonomic levels " "sorted from deeper to swallower clades " "(i.e. 9606 [TAB] Eukaryotes,Mammals,Primates)" ) input_group.add_argument("--spname-delimiter", dest="spname_delimiter", type=str, default="_", help="spname_delimiter is used to split" " the name of sequences into species code and" " sequence identifier (i.e. HUMAN_p53 = HUMAN, p53)." " Note that species name must always precede seq.identifier.") input_group.add_argument("--spfile", dest="spfile", type=is_file, help="If specified, only the sequences and ortholog" " pairs matching the group of species in this file" " (one species code per line) will be used. ") npr_group = parser.add_argument_group('==== NPR options ====') npr_group.add_argument("-r", "--recursive", dest="npr_workflows", required=False, nargs="*", help="Enables recursive NPR capabilities (Nested Phylogenetic Reconstruction)" " and specifies custom workflows and filters for each NPR iteration.") npr_group.add_argument("--nt_switch_thr", dest="nt_switch_thr", required=False, type=float, default = 0.95, help="Sequence similarity at which nucleotide based alignments should be used" " instead of amino-acids. ") npr_group.add_argument("--max-iters", dest="max_iters", required=False, type=int, default=99999999, help="Set a maximum number of NPR iterations allowed.") npr_group.add_argument("--first-split-outgroup", dest="first_split", type=str, default='midpoint', help=("When used, it overrides first_split option" " in any tree merger config block in the" " config file. Default: 'midpoint' ")) # Output data related flags output_group = parser.add_argument_group('==== Output Options ====') output_group.add_argument("-o", "--outdir", dest="outdir", type=str, required=True, help="""Output directory for results.""") output_group.add_argument("--scratch-dir", dest="scratch_dir", type=is_dir, help="""If provided, ete-build will run on the scratch folder and all files will be transferred to the output dir when finished. """) output_group.add_argument("--db-dir", dest="db_dir", type=is_dir, help="""Alternative location of the database directory""") output_group.add_argument("--tasks-dir", dest="tasks_dir", type=is_dir, help="""Output directory for the executed processes (intermediate files).""") output_group.add_argument("--compress", action="store_true", help="Compress all intermediate files when" " a workflow is finished.") output_group.add_argument("--logfile", action="store_true", help="Log messages will be saved into a file named npr.log within the output directory.") output_group.add_argument("--noimg", action="store_true", help="Tree images will not be generated when a workflow is finished.") output_group.add_argument("--email", dest="email", type=str, help="Send an email when errors occur or a workflow is done.") output_group.add_argument("--email_report_time", dest="email_report_time", type=int, default = 0, help="How often (in minutes) an email reporting the status of the execution should be sent. 0=No reports") # Task execution related flags exec_group = parser.add_argument_group('==== Execution Mode Options ====') exec_group.add_argument("-C", "--cpu", dest="maxcores", type=int, default=1, help="Maximum number of CPU cores" " available in the execution host. If higher" " than 1, tasks with multi-threading" " capabilities will enabled. Note that this" " number will work as a hard limit for all applications," "regardless of their specific configuration.") exec_group.add_argument("-t", "--schedule-time", dest="schedule_time", type=float, default=2, help="""How often (in secs) tasks should be checked for available results.""") exec_group.add_argument("--launch-time", dest="launch_time", type=float, default=3, help="""How often (in secs) queued jobs should be checked for launching""") exec_type_group = exec_group.add_mutually_exclusive_group() exec_type_group.add_argument("--noexec", dest="no_execute", action="store_true", help=("Prevents launching any external application." " Tasks will be processed and intermediate steps will" " run, but no real computation will be performed.")) exec_type_group.add_argument("--sge", dest="sge_execute", action="store_true", help="EXPERIMENTAL!: Jobs will be" " launched using the Sun Grid Engine" " queue system.") exec_group.add_argument("--monitor", dest="monitor", action="store_true", help="Monitor mode: pipeline jobs will be" " detached from the main process. This means that" " when npr execution is interrupted, all currently" " running jobs will keep running. Use this option if you" " want to stop and recover an execution thread or" " if jobs are expected to be executed remotely." ) exec_group.add_argument("--override", dest="override", action="store_true", help="Override workflow configuration file if a previous version exists." ) exec_group.add_argument("--clearall", dest="clearall", action="store_true", help="Erase all previous data in the output directory and start a clean execution.") exec_group.add_argument("--softclear", dest="softclear", action="store_true", help="Clear all precomputed data (data.db), but keeps task raw data in the directory, so they can be re-processed.") exec_group.add_argument("--clear-seqdb", dest="clearseqs", action="store_true", help="Reload sequences deleting previous database if necessary.") # exec_group.add_argument("--arch", dest="arch", # choices=["auto", "32", "64"], # default="auto", help="Set the architecture of" # " execution hosts (needed only when using" # " built-in applications.)") exec_group.add_argument("--nochecks", dest="nochecks", action="store_true", help="Skip basic checks (i.e. tools available) everytime the application starts.") # Interface related flags ui_group = parser.add_argument_group("==== Program Interface Options ====") # ui_group.add_argument("-u", dest="enable_ui", # action="store_true", help="When used, a color" # " based interface is launched to monitor NPR" # " processes. This feature is EXPERIMENTAL and" # " requires NCURSES libraries installed in your" # " system.") ui_group.add_argument("-v", dest="verbosity", default=0, type=int, choices=[0,1,2,3,4], help="Verbosity level: 0=very quiet, 4=very " " verbose.") ui_group.add_argument("--debug", nargs="?", const="all", help="Start debugging" " A taskid can be provided, so" " debugging will start from such task on.") args = parser.parse_args() if args.tools_dir: APPSPATH = args.tools_dir if not pexist(APPSPATH): print(colorify('\nWARNING: external applications directory are not found at %s' %APPSPATH, "yellow"), file=sys.stderr) print(colorify('Use "ete build install_tools" to install or upgrade tools', "orange"), file=sys.stderr) args.enable_ui = False if not args.noimg: print('Testing ETE-build graphics support...') print('X11 DISPLAY = %s' %colorify(os.environ.get('DISPLAY', 'not detected!'), 'yellow')) print('(You can use --noimg to disable graphical capabilities)') try: from .. import Tree Tree().render('/tmp/etenpr_img_test.png') except: raise ConfigError('img generation not supported') if not args.aa_seed_file and not args.nt_seed_file: parser.error('At least one input file argument (-a, -n) is required') outdir = os.path.abspath(args.outdir) final_dir, runpath = os.path.split(outdir) if not runpath: raise ValueError("Invalid outdir") GLOBALS["output_dir"] = os.path.abspath(args.outdir) if args.scratch_dir: # set paths for scratch folder for sqlite files print("Creating temporary scratch dir...", file=sys.stderr) base_scratch_dir = os.path.abspath(args.scratch_dir) scratch_dir = tempfile.mkdtemp(prefix='npr_tmp', dir=base_scratch_dir) GLOBALS["scratch_dir"] = scratch_dir GLOBALS["basedir"] = scratch_dir else: GLOBALS["basedir"] = GLOBALS["output_dir"] GLOBALS["first_split_outgroup"] = args.first_split GLOBALS["email"] = args.email GLOBALS["verbosity"] = args.verbosity GLOBALS["email_report_time"] = args.email_report_time * 60 GLOBALS["launch_time"] = args.launch_time GLOBALS["cmdline"] = ' '.join(sys.argv) GLOBALS["threadinfo"] = defaultdict(dict) GLOBALS["seqtypes"] = set() GLOBALS["target_species"] = set() GLOBALS["target_sequences"] = set() GLOBALS["spname_delimiter"] = args.spname_delimiter GLOBALS["color_shell"] = True GLOBALS["citator"] = Citator() GLOBALS["lineages"] = None GLOBALS["cogs_file"] = None GLOBALS["citator"].add("ETE") if not pexist(GLOBALS["basedir"]): os.makedirs(GLOBALS["basedir"]) # when killed, translate signal into exception so program can exit cleanly def raise_control_c(_signal, _frame): if GLOBALS.get('_background_scheduler', None): GLOBALS['_background_scheduler'].terminate() raise KeyboardInterrupt signal.signal(signal.SIGTERM, raise_control_c) # Start the application app_wrapper(main, args)
def _main(arguments): global BASEPATH, APPSPATH, args APPSPATH = os.path.expanduser("~/.etetoolkit/ext_apps-latest/") ETEHOMEDIR = os.path.expanduser("~/.etetoolkit/") if os.path.exists(pjoin('/etc/etetoolkit/', 'ext_apps-latest')): # if a copy of apps is part of the ete distro, use if by default APPSPATH = pjoin('/etc/etetoolkit/', 'ext_apps-latest') ETEHOMEDIR = '/etc/etetoolkit/' else: # if not, try a user local copy APPSPATH = pjoin(ETEHOMEDIR, 'ext_apps-latest') if len(arguments) == 1: if not pexist(APPSPATH): print(colorify('\nWARNING: external applications directory are not found at %s' %APPSPATH, "yellow"), file=sys.stderr) print(colorify('Use "ete build install_tools" to install or upgrade tools', "orange"), file=sys.stderr) elif len(arguments) > 1: _config_path = pjoin(BASEPATH, 'phylobuild.cfg') if arguments[1] == "install_tools": import urllib import tarfile print (colorify('Downloading latest version of tools...', "green"), file=sys.stderr) if len(arguments) > 2: TARGET_DIR = arguments[2] else: TARGET_DIR = '' while not pexist(TARGET_DIR): TARGET_DIR = input('target directory? [%s]:' %ETEHOMEDIR).strip() if TARGET_DIR == '': TARGET_DIR = ETEHOMEDIR break if TARGET_DIR == ETEHOMEDIR: try: os.mkdir(ETEHOMEDIR) except OSError: pass version_file = "latest.tar.gz" urllib.urlretrieve("https://github.com/jhcepas/ext_apps/archive/%s" %version_file, pjoin(TARGET_DIR, version_file)) print(colorify('Decompressing...', "green"), file=sys.stderr) tfile = tarfile.open(pjoin(TARGET_DIR, version_file), 'r:gz') tfile.extractall(TARGET_DIR) print(colorify('Compiling tools...', "green"), file=sys.stderr) sys.path.insert(0, pjoin(TARGET_DIR, 'ext_apps-latest')) import compile_all s = compile_all.compile_all() sys.exit(s) elif arguments[1] == "check": if not pexist(APPSPATH): print(colorify('\nWARNING: external applications directory are not found at %s' %APPSPATH, "yellow"), file=sys.stderr) print(colorify('Use "ete build install_tools" to install or upgrade', "orange"), file=sys.stderr) # setup portable apps config = {} for k in apps.builtin_apps: cmd = apps.get_call(k, APPSPATH, "/tmp", "1") config[k] = cmd apps.test_apps(config) sys.exit(0) elif arguments[1] in ("workflows", "wl"): if arguments[1] == "wl": print(colorify("WARNING: 'wl' is obsolete and will be removed in the future, use 'workflows' instead", "orange"), file=sys.stderr) base_config = check_config(_config_path) list_workflows(base_config) sys.exit(0) elif arguments[1] == "apps": base_config = check_config(_config_path) list_apps(base_config, set(arguments[2:])) sys.exit(0) elif arguments[1] == "show": base_config = check_config(_config_path) try: block = arguments[2] except IndexError: print("Expected a block name, found none") sys.exit(1) block_detail(block, base_config) sys.exit(0) elif arguments[1] == "dump": if len(arguments) > 2: base_config = check_config(_config_path) block_detail(arguments[2], base_config, color=False) else: print(open(_config_path).read()) sys.exit(0) elif arguments[1] == "validate": print('Validating configuration file ', arguments[2]) if pexist(arguments[2]): base_config = check_config(arguments[2]) print('Everything ok') else: print('File does not exist') sys.exit(-1) sys.exit(0) elif arguments[1] == "version": print(__VERSION__, '(%s)' %__DATE__) sys.exit(0) parser = argparse.ArgumentParser(description=__DESCRIPTION__ + __EXAMPLES__, formatter_class=argparse.RawDescriptionHelpFormatter) # Input data related flags input_group = parser.add_argument_group('==== Input Options ====') input_group.add_argument('[check | workflows | apps | show | dump | validate | version | install_tools]', nargs='?', help=("Utility commands:\n" "check: check that external applications are executable.\n" "wl: show a list of available workflows.\n" "show [name]: show the configuration parameters of a given workflow or application config block.\n" "dump [name]: dump the configuration parameters of the specified block (allows to modify predefined config).\n" "validate [configfile]: Validate a custom configuration file.\n" "version: Show current version.\n" )) input_group.add_argument("-c", "--custom-config", dest="custom_config", type=is_file, help="Custom configuration file.") input_group.add_argument("--base-config", dest="base_config", type=is_file, default=BASEPATH+'/phylobuild.cfg', help="Base configuration file.") input_group.add_argument("--tools-dir", dest="tools_dir", type=str, help="Custom path where external software is avaiable.") input_group.add_argument("-w", dest="workflow", required=True, nargs='+', help="One or more gene-tree workflow names. All the specified workflows will be executed using the same input data.") input_group.add_argument("-m", dest="supermatrix_workflow", required=False, nargs='+', help="One or more super-matrix workflow names. All the specified workflows will be executed using the same input data.") input_group.add_argument("-a", dest="aa_seed_file", type=is_file, help="Initial multi sequence file with" " protein sequences.") input_group.add_argument("-n", dest="nt_seed_file", type=is_file, help="Initial multi sequence file with" " nucleotide sequences") # input_group.add_argument("--seqformat", dest="seqformat", # choices=["fasta", "phylip", "iphylip", "phylip_relaxed", "iphylip_relaxed"], # default="fasta", # help="") input_group.add_argument("--dealign", dest="dealign", action="store_true", help="when used, gaps in the orginal fasta file will" " be removed, thus allowing to use alignment files as input.") input_group.add_argument("--seq-name-parser", dest="seq_name_parser", type=str, help=("A Perl regular expression containing a matching group, which is" " used to parse sequence names from the input files. Use this option to" " customize the names that should be shown in the output files." " The matching group (the two parentheses) in the provided regular" " expression will be assumed as sequence name. By default, all " " characthers until the first blank space or tab delimiter are " " used as the sequence names."), default='^([^\s]+)') input_group.add_argument("--no-seq-rename", dest="seq_rename", action="store_false", help="If used, sequence names will NOT be" " internally translated to 10-character-" "identifiers.") input_group.add_argument("--no-seq-checks", dest="no_seq_checks", action="store_true", help="Skip consistency sequence checks for not allowed symbols, etc.") input_group.add_argument("--no-seq-correct", dest="no_seq_correct", action="store_true", help="Skip sequence compatibility changes: i.e. U, J and O symbols are converted into X by default.") dup_names_group = input_group.add_mutually_exclusive_group() dup_names_group.add_argument("--ignore-dup-seqnames", dest="ignore_dup_seqnames", action = "store_true", help=("If duplicated sequence names exist in the input" " fasta file, a single random instance will be used.")) dup_names_group.add_argument("--rename-dup-seqnames", dest="rename_dup_seqnames", action = "store_true", help=("If duplicated sequence names exist in the input" " fasta file, duplicates will be renamed.")) input_group.add_argument("--seqdb", dest="seqdb", type=str, help="Uses a custom sequence database file") # supermatrix workflow input_group.add_argument("--cogs", dest="cogs_file", type=is_file, help="A file defining clusters of orthologous groups." " One per line. Tab delimited sequence ids. ") input_group.add_argument("--lineages", dest="lineages_file", type=is_file, help="EXPERIMENTAL:A file containing the (sorted) lineage " "track of each species. It enables " "NPR algorithm to fix what taxonomic " "levels should be optimized." "Note that linage tracks must consist in " "a comma separated list of taxonomic levels " "sorted from deeper to swallower clades " "(i.e. 9606 [TAB] Eukaryotes,Mammals,Primates)" ) input_group.add_argument("--spname-delimiter", dest="spname_delimiter", type=str, default="_", help="spname_delimiter is used to split" " the name of sequences into species code and" " sequence identifier (i.e. HUMAN_p53 = HUMAN, p53)." " Note that species name must always precede seq.identifier.") input_group.add_argument("--spfile", dest="spfile", type=is_file, help="If specified, only the sequences and ortholog" " pairs matching the group of species in this file" " (one species code per line) will be used. ") npr_group = parser.add_argument_group('==== NPR options ====') npr_group.add_argument("-r", "--recursive", dest="npr_workflows", required=False, nargs="*", help="EXPERIMENTAL:Enables recursive NPR capabilities (Nested Phylogenetic Reconstruction)" " and specifies custom workflows and filters for each NPR iteration.") npr_group.add_argument("--nt-switch-threshold", dest="nt_switch_thr", required=False, type=float, default = 0.95, help="Sequence similarity at which nucleotide based alignments should be used" " instead of amino-acids. ") npr_group.add_argument("--max-iters", dest="max_iters", required=False, type=int, default=99999999, help="EXPERIMENTAL:Set a maximum number of NPR iterations allowed.") npr_group.add_argument("--first-split-outgroup", dest="first_split", type=str, default='midpoint', help=("EXPERIMENTAL:When used, it overrides first_split option" " in any tree merger config block in the" " config file. Default: 'midpoint' ")) # Output data related flags output_group = parser.add_argument_group('==== Output Options ====') output_group.add_argument("-o", "--outdir", dest="outdir", type=str, required=True, help="""Output directory for results.""") output_group.add_argument("--scratch-dir", dest="scratch_dir", type=is_dir, help="""If provided, ete-build will run on the scratch folder and all files will be transferred to the output dir when finished. """) output_group.add_argument("--db-dir", dest="db_dir", type=is_dir, help="""Alternative location of the database directory""") output_group.add_argument("--tasks-dir", dest="tasks_dir", type=is_dir, help="""Output directory for the executed processes (intermediate files).""") output_group.add_argument("--compress", action="store_true", help="Compress all intermediate files when" " a workflow is finished.") output_group.add_argument("--logfile", action="store_true", help="Log messages will be saved into a file named npr.log within the output directory.") output_group.add_argument("--noimg", action="store_true", help="Tree images will not be generated when a workflow is finished.") output_group.add_argument("--email", dest="email", type=str, help="EXPERIMENTAL:Send an email when errors occur or a workflow is done.") output_group.add_argument("--email-report-time", dest="email_report_time", type=int, default = 0, help="EXPERIMENTAL:How often (in minutes) an email reporting the status of the execution should be sent. 0=No reports") # Task execution related flags exec_group = parser.add_argument_group('==== Execution Mode Options ====') exec_group.add_argument("-C", "--cpu", dest="maxcores", type=int, default=1, help="Maximum number of CPU cores" " available in the execution host. If higher" " than 1, tasks with multi-threading" " capabilities will enabled. Note that this" " number will work as a hard limit for all applications," "regardless of their specific configuration.") exec_group.add_argument("-t", "--schedule-time", dest="schedule_time", type=float, default=2, help="""How often (in secs) tasks should be checked for available results.""") exec_group.add_argument("--launch-time", dest="launch_time", type=float, default=3, help="""How often (in secs) queued jobs should be checked for launching""") exec_type_group = exec_group.add_mutually_exclusive_group() exec_type_group.add_argument("--noexec", dest="no_execute", action="store_true", help=("Prevents launching any external application." " Tasks will be processed and intermediate steps will" " run, but no real computation will be performed.")) # exec_type_group.add_argument("--sge", dest="sge_execute", # action="store_true", help="EXPERIMENTAL!: Jobs will be" # " launched using the Sun Grid Engine" # " queue system.") exec_group.add_argument("--monitor", dest="monitor", action="store_true", help="Monitor mode: pipeline jobs will be" " detached from the main process. This means that" " when npr execution is interrupted, all currently" " running jobs will keep running. Use this option if you" " want to stop and recover an execution thread or" " if jobs are expected to be executed remotely." ) exec_group.add_argument("--resume", dest="resume", action="store_true", help="If output directory exists, reuse data from it if possible. ") exec_group.add_argument("--clearall", dest="clearall", action="store_true", help="If output directory exists, erase all previous data and start a clean execution.") exec_group.add_argument("--softclear", dest="softclear", action="store_true", help="Clear all precomputed data (data.db), but keeps task raw data in the directory, so they can be re-processed.") exec_group.add_argument("--clear-seqdb", dest="clearseqs", action="store_true", help="Reload sequences deleting previous database if necessary.") # exec_group.add_argument("--arch", dest="arch", # choices=["auto", "32", "64"], # default="auto", help="Set the architecture of" # " execution hosts (needed only when using" # " built-in applications.)") exec_group.add_argument("--nochecks", dest="nochecks", action="store_true", help="Skip basic checks (i.e. tools available) everytime the application starts.") # Interface related flags ui_group = parser.add_argument_group("==== Program Interface Options ====") # ui_group.add_argument("-u", dest="enable_ui", # action="store_true", help="When used, a color" # " based interface is launched to monitor NPR" # " processes. This feature is EXPERIMENTAL and" # " requires NCURSES libraries installed in your" # " system.") ui_group.add_argument("-v", dest="verbosity", default=0, type=int, choices=[0,1,2,3,4], help="Verbosity level: 0=very quiet, 4=very " " verbose.") ui_group.add_argument("--debug", nargs="?", const="all", help="Start debugging" " A taskid can be provided, so" " debugging will start from such task on.") args = parser.parse_args(arguments) if args.tools_dir: APPSPATH = args.tools_dir if not pexist(APPSPATH): print(colorify('\nWARNING: external applications directory are not found at %s' %APPSPATH, "yellow"), file=sys.stderr) print(colorify('Use "ete build install_tools" to install or upgrade tools', "orange"), file=sys.stderr) args.enable_ui = False if not args.noimg: print('Testing ETE-build graphics support...') print('X11 DISPLAY = %s' %colorify(os.environ.get('DISPLAY', 'not detected!'), 'yellow')) print('(You can use --noimg to disable graphical capabilities)') try: from .. import Tree Tree().render('/tmp/etenpr_img_test.png') except: raise ConfigError('img generation not supported') if not args.aa_seed_file and not args.nt_seed_file: parser.error('At least one input file argument (-a, -n) is required') outdir = os.path.abspath(args.outdir) final_dir, runpath = os.path.split(outdir) if not runpath: raise ValueError("Invalid outdir") GLOBALS["output_dir"] = os.path.abspath(args.outdir) if args.scratch_dir: # set paths for scratch folder for sqlite files print("Creating temporary scratch dir...", file=sys.stderr) base_scratch_dir = os.path.abspath(args.scratch_dir) scratch_dir = tempfile.mkdtemp(prefix='npr_tmp', dir=base_scratch_dir) GLOBALS["scratch_dir"] = scratch_dir GLOBALS["basedir"] = scratch_dir else: GLOBALS["basedir"] = GLOBALS["output_dir"] GLOBALS["first_split_outgroup"] = args.first_split GLOBALS["email"] = args.email GLOBALS["verbosity"] = args.verbosity GLOBALS["email_report_time"] = args.email_report_time * 60 GLOBALS["launch_time"] = args.launch_time GLOBALS["cmdline"] = ' '.join(arguments) GLOBALS["threadinfo"] = defaultdict(dict) GLOBALS["seqtypes"] = set() GLOBALS["target_species"] = set() GLOBALS["target_sequences"] = set() GLOBALS["spname_delimiter"] = args.spname_delimiter GLOBALS["color_shell"] = True GLOBALS["citator"] = Citator() GLOBALS["lineages"] = None GLOBALS["cogs_file"] = None GLOBALS["citator"].add("ETE") if not pexist(GLOBALS["basedir"]): os.makedirs(GLOBALS["basedir"]) # when killed, translate signal into exception so program can exit cleanly def raise_control_c(_signal, _frame): if GLOBALS.get('_background_scheduler', None): GLOBALS['_background_scheduler'].terminate() raise KeyboardInterrupt signal.signal(signal.SIGTERM, raise_control_c) # Start the application app_wrapper(main, args)