def importSingularityImage(): """Import the Singularity image from Docker if using Singularity.""" mode = os.environ.get("CACTUS_BINARIES_MODE", "docker") localImage = os.environ.get("CACTUS_USE_LOCAL_SINGULARITY_IMG", "0") if mode == "singularity": imgPath = os.environ["CACTUS_SINGULARITY_IMG"] # If not using local image, pull the docker image if localImage == "0": # Singularity will complain if the image file already exists. Remove it. try: os.remove(imgPath) except OSError: # File doesn't exist pass # Singularity 2.4 broke the functionality that let --name # point to a path instead of a name in the CWD. So we change # to the proper directory manually, then change back after the # image is pulled. # NOTE: singularity writes images in the current directory only # when SINGULARITY_CACHEDIR is not set oldCWD = os.getcwd() os.chdir(os.path.dirname(imgPath)) # --size is deprecated starting in 2.4, but is needed for 2.3 support. Keeping it in for now. try: check_call(["singularity", "pull", "--size", "2000", "--name", os.path.basename(imgPath), "docker://" + getDockerImage()]) except CalledProcessError: # Call failed, try without --size, required for singularity 3+ check_call(["singularity", "pull", "--name", os.path.basename(imgPath), "docker://" + getDockerImage()]) os.chdir(oldCWD) else: logger.info("Using pre-built singularity image: '{}'".format(imgPath))
def importSingularityImage(): """Import the Singularity image from Docker if using Singularity.""" mode = os.environ.get("CACTUS_BINARIES_MODE", "docker") if mode == "singularity": imgPath = os.environ["CACTUS_SINGULARITY_IMG"] # Singularity will complain if the image file already exists. Remove it. try: os.remove(imgPath) except OSError: # File doesn't exist pass # Singularity 2.4 broke the functionality that let --name # point to a path instead of a name in the CWD. So we change # to the proper directory manually, then change back after the # image is pulled. # NOTE: singularity writes images in the current directory only # when SINGULARITY_CACHEDIR is not set oldCWD = os.getcwd() os.chdir(os.path.dirname(imgPath)) # --size is deprecated starting in 2.4, but is needed for 2.3 support. Keeping it in for now. check_call([ "singularity", "pull", "--size", "2000", "--name", os.path.basename(imgPath), "docker://" + getDockerImage() ]) os.chdir(oldCWD)
def importSingularityImage(options): """Import the Singularity image from Docker if using Singularity.""" mode = os.environ.get("CACTUS_BINARIES_MODE", "docker") localImage = os.environ.get("CACTUS_USE_LOCAL_SINGULARITY_IMG", "0") if mode == "singularity" and Toil.parseLocator( options.jobStore)[0] == "file": imgPath = os.environ["CACTUS_SINGULARITY_IMG"] # If not using local image, pull the docker image if localImage == "0": # Singularity will complain if the image file already exists. Remove it. try: os.remove(imgPath) except OSError: # File doesn't exist pass # Singularity 2.4 broke the functionality that let --name # point to a path instead of a name in the CWD. So we change # to the proper directory manually, then change back after the # image is pulled. # NOTE: singularity writes images in the current directory only # when SINGULARITY_CACHEDIR is not set oldCWD = os.getcwd() os.chdir(os.path.dirname(imgPath)) # --size is deprecated starting in 2.4, but is needed for 2.3 support. Keeping it in for now. try: check_call([ "singularity", "pull", "--size", "2000", "--name", os.path.basename(imgPath), "docker://" + getDockerImage() ]) except CalledProcessError: # Call failed, try without --size, required for singularity 3+ check_call([ "singularity", "pull", "--name", os.path.basename(imgPath), "docker://" + getDockerImage() ]) os.chdir(oldCWD) else: logger.info( "Using pre-built singularity image: '{}'".format(imgPath))
def main(): parser = ArgumentParser() parser.add_argument("seqFile", help = "Seq file") parser.add_argument("--outDir", help='Directory where the processed leaf sequence and ancestral sequences will be placed.' ' Required when not using --wdl') parser.add_argument("--outSeqFile", help="Path for annotated Seq file output [default: outDir/seqFile]") parser.add_argument("--outHal", help="Output HAL file [default: outDir/out.hal]") parser.add_argument("--wdl", action="store_true", help="output wdl workflow instead of list of commands") parser.add_argument("--noLocalInputs", action="store_true", help="dont embed local input paths in WDL script (as they will need" " to be respecified when running on Terra") parser.add_argument("--configFile", default=os.path.join(cactusRootPath(), "cactus_progressive_config.xml")) parser.add_argument("--preprocessBatchSize", type=int, default=3, help="size (number of genomes) of suggested preprocessing jobs") parser.add_argument("--jobStore", type=str, default="./jobstore", help="base directory of jobStores to use in suggested commands") parser.add_argument("--halOptions", type=str, default="--hdf5InMemory", help="options for every hal command") parser.add_argument("--cactusOptions", type=str, default="--realTimeLogging --logInfo", help="options for every cactus command") parser.add_argument("--preprocessOnly", action="store_true", help="only decompose into preprocessor and cactus jobs") parser.add_argument("--dockerImage", type=str, help="docker image to use as wdl runtime") parser.add_argument("--gpu", action="store_true", help="use gpu-enabled lastz in cactus-blast") parser.add_argument("--gpuType", default="nvidia-tesla-v100", help="GPU type (to set in WDL runtime parameters)") parser.add_argument("--gpuCount", default=1, help="GPU count (to set in WDL runtime parameters)") parser.add_argument("--nvidiaDriver", default="440.64.00", help="Nvidia driver version") parser.add_argument("--gpuZone", default="us-central1-c", help="zone used for gpu task") parser.add_argument("--zone", default="us-west2-a", help="zone used for all but gpu tasks") parser.add_argument("--defaultCores", type=int, help="Number of cores for each job unless otherwise specified") parser.add_argument("--preprocessCores", type=int, help="Number of cores for each cactus-preprocess job") parser.add_argument("--blastCores", type=int, help="Number of cores for each cactus-blast job") parser.add_argument("--alignCores", type=int, help="Number of cores for each cactus-align job") parser.add_argument("--defaultMem", type=float, help="Memory in GB for each job unless otherwise specified") parser.add_argument("--preprocessMem", type=float, help="Memory in GB for each cactus-preprocess job") parser.add_argument("--blastMem", type=float, help="Memory in GB for each cactus-blast job") parser.add_argument("--alignMem", type=float, help="Memory in GB for each cactus-align job") parser.add_argument("--defaultDisk", type=int, help="Disk in GB for each job unless otherwise specified") parser.add_argument("--preprocessDisk", type=int, help="Disk in GB for each cactus-preprocess job") parser.add_argument("--blastDisk", type=int, help="Disk in GB for each cactus-blast job") parser.add_argument("--alignDisk", type=int, help="Disk in GB for each cactus-align job") parser.add_argument("--halAppendDisk", type=int, help="Disk in GB for each halAppendSubtree job") parser.add_argument("--preprocessPreemptible", type=int, help="Preemptible in GB for each cactus-preprocess job [default=2]", default=2) parser.add_argument("--blastPreemptible", type=int, help="Preemptible in GB for each cactus-blast job [default=1]", default=1) parser.add_argument("--alignPreemptible", type=int, help="Preemptible in GB for each cactus-align job [default=1]", default=1) parser.add_argument("--halAppendPreemptible", type=int, help="Preemptible in GB for each halAppendSubtree job [default=1]", default=1) options = parser.parse_args() options.database = 'kyoto_tycoon' #todo support root option options.root = None if not options.wdl: if not options.outDir: raise RuntimeError("--outDir option required when not using --wdl") if not options.outSeqFile: options.outSeqFile = os.path.join(options.outDir, os.path.basename(options.seqFile)) if os.path.abspath(options.seqFile) == os.path.abspath(options.outSeqFile): options.outSeqFile += '.1' if (not options.wdl or not options.gpu) and (options.gpuCount > 1 or options.gpuType != "nvidia-tesla-v100"): raise RuntimeError("--gpuType and gpuCount can only be used with --wdl --gpu") if not options.outHal: options.outHal = os.path.join(options.outDir if options.outDir else '', 'out.hal') if options.wdl: if options.preprocessBatchSize != 1: if options.preprocessBatchSize != 3: # hacky way to only warn for non-default sys.stderr.write("Warning: --preprocessBatchSize reset to 1 for --wdl support\n") options.preprocessBatchSize = 1 # wdl handles output file structure if options.outDir: sys.stderr.write("Warning: --outDir option ignored with --wdl\n") options.outDir = "." if options.outSeqFile: sys.stderr.write("Warning: --outSeqFile option ignored with --wdl\n") options.outSeqFile = None if options.preprocessOnly: raise RuntimeError('--preprocessOnly cannot be used in conjunction with --wdl') if not options.dockerImage: options.dockerImage = getDockerImage() # apply defaults if options.defaultCores: if not options.preprocessCores: options.preprocessCores = options.defaultCores if not options.blastCores: options.blastCores = options.defaultCores if not options.alignCores: options.alignCores = options.defaultCores if options.defaultMem: if not options.preprocessMem: options.preprocessMem = options.defaultMem if not options.blastMem: options.blastMem = options.defaultMem if not options.alignMem: options.alignMem = options.defaultMem if not options.alignCores or options.alignCores == 1: if options.alignCores == 1: sys.stderr.write("Warning: --alignCores changed from 1 to 2\n") options.alignCores = 2 if options.defaultDisk: if not options.preprocessDisk: options.preprocessDisk = options.defaultDisk if not options.blastDisk: options.blastDisk = options.defaultDisk if not options.alignDisk: options.alignDisk = options.defaultDisk if not options.halAppendDisk: options.halAppendDisk = options.defaultDisk # https://cromwell.readthedocs.io/en/stable/RuntimeAttributes/#gpucount-gputype-and-nvidiadriverversion # note: k80 not included as WGA_GPU doesn't run on it. acceptable_gpus = ['nvidia-tesla-v100', 'nvidia-tesla-p100', 'nvidia-tesla-p4', 'nvidia-tesla-t4'] if options.gpuType not in acceptable_gpus: raise RuntimeError('--gpuType {} not supported by Terra. Acceptable types are {}'.format( options.gpuType, acceptable_gpus)) # need to go through this garbage (copied from the main() in progressive_cactus) to # come up with the project options.cactusDir = getTempDirectory() #Create the progressive cactus project projWrapper = ProjectWrapper(options, options.configFile) projWrapper.writeXml() # used to unique jobstore options.jobStoreCount = 0 pjPath = os.path.join(options.cactusDir, ProjectWrapper.alignmentDirName, '%s_project.xml' % ProjectWrapper.alignmentDirName) assert os.path.exists(pjPath) project = MultiCactusProject() if not os.path.isdir(options.cactusDir): os.makedirs(options.cactusDir) project.readXML(pjPath) enableDumpStack() cactusPrepare(options, project)
def main(toil_mode=False): parser = ArgumentParser() if toil_mode: Job.Runner.addToilOptions(parser) parser.add_argument("--latest", dest="latest", action="store_true", help="Use the latest version of the docker container " "rather than pulling one matching this version of cactus") parser.add_argument("--containerImage", dest="containerImage", default=None, help="Use the the specified pre-built containter image " "rather than pulling one from quay.io") parser.add_argument("--binariesMode", choices=["docker", "local", "singularity"], help="The way to run the Cactus binaries (at top level; use --cactusOpts to set it in nested calls)", default=None) parser.add_argument("seqFile", help = "Seq file") parser.add_argument("--outDir", help='Directory where the processed leaf sequence and ancestral sequences will be placed.' ' Required when not using --wdl') parser.add_argument("--outSeqFile", help="Path for annotated Seq file output [default: outDir/seqFile]") parser.add_argument("--outHal", help="Output HAL file [default: outDir/out.hal]", required=toil_mode) if not toil_mode: parser.add_argument("--wdl", action="store_true", help="output wdl workflow instead of list of commands") parser.add_argument("--noLocalInputs", action="store_true", help="dont embed local input paths in WDL script (as they will need" " to be respecified when running on Terra") parser.add_argument("--jobStore", type=str, default="./jobstore", help="base directory of jobStores to use in suggested commands") parser.add_argument("--configFile", default=os.path.join(cactusRootPath(), "cactus_progressive_config.xml")) parser.add_argument("--preprocessBatchSize", type=int, default=3, help="size (number of genomes) of suggested preprocessing jobs") parser.add_argument("--halOptions", type=str, default="--hdf5InMemory", help="options for every hal command") parser.add_argument("--cactusOptions", type=str, default="--realTimeLogging --logInfo --retryCount 0", help="options for every cactus command") parser.add_argument("--preprocessOnly", action="store_true", help="only decompose into preprocessor and cactus jobs") parser.add_argument("--dockerImage", type=str, help="docker image to use as wdl runtime") parser.add_argument("--gpu", action="store_true", help="use gpu-enabled lastz in cactus-blast") parser.add_argument("--gpuType", default="nvidia-tesla-v100", help="GPU type (to set in WDL runtime parameters)") parser.add_argument("--gpuCount", default=1, help="GPU count (to set in WDL runtime parameters)") parser.add_argument("--nvidiaDriver", default="440.64.00", help="Nvidia driver version") parser.add_argument("--gpuZone", default="us-central1-c", help="zone used for gpu task") parser.add_argument("--zone", default="us-west2-a", help="zone used for all but gpu tasks") if not toil_mode: parser.add_argument("--defaultCores", type=int, help="Number of cores for each job unless otherwise specified") parser.add_argument("--preprocessCores", type=int, help="Number of cores for each cactus-preprocess job") parser.add_argument("--blastCores", type=int, help="Number of cores for each cactus-blast job") parser.add_argument("--alignCores", type=int, help="Number of cores for each cactus-align job") if not toil_mode: parser.add_argument("--defaultMemory", type=human2bytesN, help="Memory for each job unless otherwise specified. " "Standard suffixes like K, Ki, M, Mi, G or Gi are supported (default=bytes)") parser.add_argument("--preprocessMemory", type=human2bytesN, help="Memory for each cactus-preprocess job. " "Standard suffixes like K, Ki, M, Mi, G or Gi are supported (default=bytes)") parser.add_argument("--blastMemory", type=human2bytesN, help="Memory for each cactus-blast job. " "Standard suffixes like K, Ki, M, Mi, G or Gi are supported (default=bytes)") parser.add_argument("--alignMemory", type=human2bytesN, help="Memory for each cactus-align job. " "Standard suffixes like K, Ki, M, Mi, G or Gi are supported (default=bytes)") if not toil_mode: parser.add_argument("--defaultDisk", type=human2bytesN, help="Disk for each job unless otherwise specified. " "Standard suffixes like K, Ki, M, Mi, G or Gi are supported (default=bytes)") parser.add_argument("--preprocessDisk", type=human2bytesN, help="Disk for each cactus-preprocess job. " "Standard suffixes like K, Ki, M, Mi, G or Gi are supported (default=bytes)") parser.add_argument("--blastDisk", type=human2bytesN, help="Disk for each cactus-blast job. " "Standard suffixes like K, Ki, M, Mi, G or Gi are supported (default=bytes)") parser.add_argument("--alignDisk", type=human2bytesN, help="Disk for each cactus-align job. " "Standard suffixes like K, Ki, M, Mi, G or Gi are supported (default=bytes)") parser.add_argument("--halAppendDisk", type=human2bytesN, help="Disk for each halAppendSubtree job. " "Standard suffixes like K, Ki, M, Mi, G or Gi are supported (default=bytes)") parser.add_argument("--preprocessPreemptible", type=int, help="Preemptible attempt count for each cactus-preprocess job [default=2]", default=2) parser.add_argument("--blastPreemptible", type=int, help="Preemptible attempt count for each cactus-blast job [default=1]", default=1) parser.add_argument("--alignPreemptible", type=int, help="Preemptible attempt count for each cactus-align job [default=1]", default=1) parser.add_argument("--halAppendPreemptible", type=int, help="Preemptible attempt count for each halAppendSubtree job [default=1]", default=1) parser.add_argument("--database", choices=["kyoto_tycoon", "redis"], help="The type of database", default="kyoto_tycoon") options = parser.parse_args() #todo support root option options.root = None if toil_mode: options.wdl = False options.noLocalInputs = False options.outDir = '.' setupBinaries(options) # need to avoid nested container calls, so set toil-inside-toil jobs to local by default if "--binariesMode" not in options.cactusOptions: options.cactusOptions += " --binariesMode local" if options.jobStore.startswith('aws'): if not options.outHal.startswith('s3://'): raise RuntimeError("--outHal must be s3:// address when using s3 job store") if not has_s3: raise RuntimeError("S3 support requires toil to be installed with [aws]") options.toil = toil_mode if not options.wdl and not options.toil: if not options.outDir: raise RuntimeError("--outDir option required when not using --wdl") if not options.outSeqFile: options.outSeqFile = os.path.join(options.outDir, os.path.basename(options.seqFile)) if os.path.abspath(options.seqFile) == os.path.abspath(options.outSeqFile): options.outSeqFile += '.1' if (not options.wdl or not options.gpu) and (options.gpuCount > 1 or options.gpuType != "nvidia-tesla-v100"): raise RuntimeError("--gpuType and gpuCount can only be used with --wdl --gpu") if not options.outHal: options.outHal = os.path.join(options.outDir if options.outDir else '', 'out.hal') if options.wdl: # wdl handles output file structure if options.outDir: sys.stderr.write("Warning: --outDir option ignored with --wdl\n") options.outDir = "." if options.outSeqFile: sys.stderr.write("Warning: --outSeqFile option ignored with --wdl\n") options.outSeqFile = None if options.preprocessOnly: raise RuntimeError('--preprocessOnly cannot be used in conjunction with --wdl') if not options.dockerImage: options.dockerImage = getDockerImage() # apply defaults if options.defaultCores: if not options.preprocessCores: options.preprocessCores = options.defaultCores if not options.blastCores: options.blastCores = options.defaultCores if not options.alignCores: options.alignCores = options.defaultCores if options.defaultMemory: if not options.preprocessMemory: options.preprocessMemory = options.defaultMemory if not options.blastMemory: options.blastMemory = options.defaultMemory if not options.alignMemory: options.alignMemory = options.defaultMemory if not options.alignCores or options.alignCores == 1: if options.alignCores == 1: sys.stderr.write("Warning: --alignCores changed from 1 to 2\n") options.alignCores = 2 if options.defaultDisk: if not options.preprocessDisk: options.preprocessDisk = options.defaultDisk if not options.blastDisk: options.blastDisk = options.defaultDisk if not options.alignDisk: options.alignDisk = options.defaultDisk if not options.halAppendDisk: options.halAppendDisk = options.defaultDisk # todo: no reason not to support non-1 batch size, but mirror wdl logic for now if options.toil: if options.preprocessBatchSize != 1: if options.preprocessBatchSize != 3: # hacky way to only warn for non-default sys.stderr.write("Warning: --preprocessBatchSize reset to 1 for --wdl support\n") options.preprocessBatchSize = 1 # todo: could also support this assert not options.preprocessOnly # https://cromwell.readthedocs.io/en/stable/RuntimeAttributes/#gpucount-gputype-and-nvidiadriverversion # note: k80 not included as WGA_GPU doesn't run on it. acceptable_gpus = ['nvidia-tesla-v100', 'nvidia-tesla-p100', 'nvidia-tesla-p4', 'nvidia-tesla-t4'] if options.gpuType not in acceptable_gpus: raise RuntimeError('--gpuType {} not supported by Terra. Acceptable types are {}'.format( options.gpuType, acceptable_gpus)) # need to go through this garbage (copied from the main() in progressive_cactus) to # come up with the project options.cactusDir = getTempDirectory() #Create the progressive cactus project projWrapper = ProjectWrapper(options, options.configFile) projWrapper.writeXml() # used to unique jobstore options.jobStoreCount = 0 pjPath = os.path.join(options.cactusDir, ProjectWrapper.alignmentDirName, '%s_project.xml' % ProjectWrapper.alignmentDirName) assert os.path.exists(pjPath) project = MultiCactusProject() if not os.path.isdir(options.cactusDir): os.makedirs(options.cactusDir) project.readXML(pjPath) enableDumpStack() cactusPrepare(options, project)