Пример #1
0
def importSingularityImage():
    """Import the Singularity image from Docker if using Singularity."""
    mode = os.environ.get("CACTUS_BINARIES_MODE", "docker")
    localImage = os.environ.get("CACTUS_USE_LOCAL_SINGULARITY_IMG", "0")
    if mode == "singularity":
        imgPath = os.environ["CACTUS_SINGULARITY_IMG"]
        # If not using local image, pull the docker image
        if localImage == "0":
            # Singularity will complain if the image file already exists. Remove it.
            try:
                os.remove(imgPath)
            except OSError:
                # File doesn't exist
                pass
            # Singularity 2.4 broke the functionality that let --name
            # point to a path instead of a name in the CWD. So we change
            # to the proper directory manually, then change back after the
            # image is pulled.
            # NOTE: singularity writes images in the current directory only
            #       when SINGULARITY_CACHEDIR is not set
            oldCWD = os.getcwd()
            os.chdir(os.path.dirname(imgPath))
            # --size is deprecated starting in 2.4, but is needed for 2.3 support. Keeping it in for now.
            try:
                check_call(["singularity", "pull", "--size", "2000", "--name", os.path.basename(imgPath),
                            "docker://" + getDockerImage()])
            except CalledProcessError:
                # Call failed, try without --size, required for singularity 3+
                check_call(["singularity", "pull", "--name", os.path.basename(imgPath),
                            "docker://" + getDockerImage()])
            os.chdir(oldCWD)
        else:
            logger.info("Using pre-built singularity image: '{}'".format(imgPath))
Пример #2
0
def importSingularityImage():
    """Import the Singularity image from Docker if using Singularity."""
    mode = os.environ.get("CACTUS_BINARIES_MODE", "docker")
    if mode == "singularity":
        imgPath = os.environ["CACTUS_SINGULARITY_IMG"]
        # Singularity will complain if the image file already exists. Remove it.
        try:
            os.remove(imgPath)
        except OSError:
            # File doesn't exist
            pass
        # Singularity 2.4 broke the functionality that let --name
        # point to a path instead of a name in the CWD. So we change
        # to the proper directory manually, then change back after the
        # image is pulled.
        # NOTE: singularity writes images in the current directory only
        #       when SINGULARITY_CACHEDIR is not set
        oldCWD = os.getcwd()
        os.chdir(os.path.dirname(imgPath))
        # --size is deprecated starting in 2.4, but is needed for 2.3 support. Keeping it in for now.
        check_call([
            "singularity", "pull", "--size", "2000", "--name",
            os.path.basename(imgPath), "docker://" + getDockerImage()
        ])
        os.chdir(oldCWD)
Пример #3
0
def importSingularityImage(options):
    """Import the Singularity image from Docker if using Singularity."""
    mode = os.environ.get("CACTUS_BINARIES_MODE", "docker")
    localImage = os.environ.get("CACTUS_USE_LOCAL_SINGULARITY_IMG", "0")
    if mode == "singularity" and Toil.parseLocator(
            options.jobStore)[0] == "file":
        imgPath = os.environ["CACTUS_SINGULARITY_IMG"]
        # If not using local image, pull the docker image
        if localImage == "0":
            # Singularity will complain if the image file already exists. Remove it.
            try:
                os.remove(imgPath)
            except OSError:
                # File doesn't exist
                pass
            # Singularity 2.4 broke the functionality that let --name
            # point to a path instead of a name in the CWD. So we change
            # to the proper directory manually, then change back after the
            # image is pulled.
            # NOTE: singularity writes images in the current directory only
            #       when SINGULARITY_CACHEDIR is not set
            oldCWD = os.getcwd()
            os.chdir(os.path.dirname(imgPath))
            # --size is deprecated starting in 2.4, but is needed for 2.3 support. Keeping it in for now.
            try:
                check_call([
                    "singularity", "pull", "--size", "2000", "--name",
                    os.path.basename(imgPath), "docker://" + getDockerImage()
                ])
            except CalledProcessError:
                # Call failed, try without --size, required for singularity 3+
                check_call([
                    "singularity", "pull", "--name",
                    os.path.basename(imgPath), "docker://" + getDockerImage()
                ])
            os.chdir(oldCWD)
        else:
            logger.info(
                "Using pre-built singularity image: '{}'".format(imgPath))
Пример #4
0
def main():
    parser = ArgumentParser()
    parser.add_argument("seqFile", help = "Seq file")
    parser.add_argument("--outDir", help='Directory where the processed leaf sequence and ancestral sequences will be placed.'
                        ' Required when not using --wdl')
    parser.add_argument("--outSeqFile", help="Path for annotated Seq file output [default: outDir/seqFile]")
    parser.add_argument("--outHal", help="Output HAL file [default: outDir/out.hal]")
    parser.add_argument("--wdl", action="store_true", help="output wdl workflow instead of list of commands")
    parser.add_argument("--noLocalInputs", action="store_true", help="dont embed local input paths in WDL script (as they will need"
                        " to be respecified when running on Terra")
    parser.add_argument("--configFile", default=os.path.join(cactusRootPath(), "cactus_progressive_config.xml"))
    parser.add_argument("--preprocessBatchSize", type=int, default=3, help="size (number of genomes) of suggested preprocessing jobs")
    parser.add_argument("--jobStore", type=str, default="./jobstore", help="base directory of jobStores to use in suggested commands")
    parser.add_argument("--halOptions", type=str, default="--hdf5InMemory", help="options for every hal command")
    parser.add_argument("--cactusOptions", type=str, default="--realTimeLogging --logInfo", help="options for every cactus command")
    parser.add_argument("--preprocessOnly", action="store_true", help="only decompose into preprocessor and cactus jobs")
    parser.add_argument("--dockerImage", type=str, help="docker image to use as wdl runtime")
    
    parser.add_argument("--gpu", action="store_true", help="use gpu-enabled lastz in cactus-blast")
    parser.add_argument("--gpuType", default="nvidia-tesla-v100", help="GPU type (to set in WDL runtime parameters)")
    parser.add_argument("--gpuCount", default=1, help="GPU count (to set in WDL runtime parameters)")
    parser.add_argument("--nvidiaDriver", default="440.64.00", help="Nvidia driver version")
    parser.add_argument("--gpuZone", default="us-central1-c", help="zone used for gpu task")
    parser.add_argument("--zone", default="us-west2-a", help="zone used for all but gpu tasks")

    parser.add_argument("--defaultCores", type=int, help="Number of cores for each job unless otherwise specified")
    parser.add_argument("--preprocessCores", type=int, help="Number of cores for each cactus-preprocess job")
    parser.add_argument("--blastCores", type=int, help="Number of cores for each cactus-blast job")
    parser.add_argument("--alignCores", type=int, help="Number of cores for each cactus-align job")

    parser.add_argument("--defaultMem", type=float, help="Memory in GB for each job unless otherwise specified")
    parser.add_argument("--preprocessMem", type=float, help="Memory in GB for each cactus-preprocess job")
    parser.add_argument("--blastMem", type=float, help="Memory in GB for each cactus-blast job")
    parser.add_argument("--alignMem", type=float, help="Memory in GB for each cactus-align job")

    parser.add_argument("--defaultDisk", type=int, help="Disk in GB for each job unless otherwise specified")
    parser.add_argument("--preprocessDisk", type=int, help="Disk in GB for each cactus-preprocess job")
    parser.add_argument("--blastDisk", type=int, help="Disk in GB for each cactus-blast job")
    parser.add_argument("--alignDisk", type=int, help="Disk in GB for each cactus-align job")
    parser.add_argument("--halAppendDisk", type=int, help="Disk in GB for each halAppendSubtree job")

    parser.add_argument("--preprocessPreemptible", type=int, help="Preemptible in GB for each cactus-preprocess job [default=2]", default=2)
    parser.add_argument("--blastPreemptible", type=int, help="Preemptible in GB for each cactus-blast job [default=1]", default=1)
    parser.add_argument("--alignPreemptible", type=int, help="Preemptible in GB for each cactus-align job [default=1]", default=1)
    parser.add_argument("--halAppendPreemptible", type=int, help="Preemptible in GB for each halAppendSubtree job [default=1]", default=1)

    options = parser.parse_args()
    options.database = 'kyoto_tycoon'
    #todo support root option
    options.root = None

    if not options.wdl:
        if not options.outDir:
            raise RuntimeError("--outDir option required when not using --wdl")
        if not options.outSeqFile:
            options.outSeqFile = os.path.join(options.outDir, os.path.basename(options.seqFile))
            if os.path.abspath(options.seqFile) == os.path.abspath(options.outSeqFile):
                options.outSeqFile += '.1'
                
    if (not options.wdl or not options.gpu) and (options.gpuCount > 1 or options.gpuType != "nvidia-tesla-v100"):
        raise RuntimeError("--gpuType and gpuCount can only be used with --wdl --gpu")

    if not options.outHal:
        options.outHal = os.path.join(options.outDir if options.outDir else '', 'out.hal')

    if options.wdl:
        if options.preprocessBatchSize != 1:
            if options.preprocessBatchSize != 3:
                # hacky way to only warn for non-default
                sys.stderr.write("Warning: --preprocessBatchSize reset to 1 for --wdl support\n")
            options.preprocessBatchSize = 1
        # wdl handles output file structure
        if options.outDir:
            sys.stderr.write("Warning: --outDir option ignored with --wdl\n")
        options.outDir = "."
        if options.outSeqFile:
            sys.stderr.write("Warning: --outSeqFile option ignored with --wdl\n")
            options.outSeqFile = None
        if options.preprocessOnly:
            raise RuntimeError('--preprocessOnly cannot be used in conjunction with --wdl')
    if not options.dockerImage:
        options.dockerImage = getDockerImage()
    # apply defaults
    if options.defaultCores:
        if not options.preprocessCores:
            options.preprocessCores = options.defaultCores
        if not options.blastCores:
            options.blastCores = options.defaultCores
        if not options.alignCores:
            options.alignCores = options.defaultCores
    if options.defaultMem:
        if not options.preprocessMem:
            options.preprocessMem = options.defaultMem
        if not options.blastMem:
            options.blastMem = options.defaultMem
        if not options.alignMem:
            options.alignMem = options.defaultMem
    if not options.alignCores or options.alignCores == 1:
        if options.alignCores == 1:
            sys.stderr.write("Warning: --alignCores changed from 1 to 2\n")
        options.alignCores = 2
    if options.defaultDisk:
        if not options.preprocessDisk:
            options.preprocessDisk = options.defaultDisk
        if not options.blastDisk:
            options.blastDisk = options.defaultDisk
        if not options.alignDisk:
            options.alignDisk = options.defaultDisk
        if not options.halAppendDisk:
            options.halAppendDisk = options.defaultDisk

    # https://cromwell.readthedocs.io/en/stable/RuntimeAttributes/#gpucount-gputype-and-nvidiadriverversion
    # note: k80 not included as WGA_GPU doesn't run on it.  
    acceptable_gpus = ['nvidia-tesla-v100', 'nvidia-tesla-p100', 'nvidia-tesla-p4', 'nvidia-tesla-t4']
    if options.gpuType not in acceptable_gpus:
        raise RuntimeError('--gpuType {} not supported by Terra.  Acceptable types are {}'.format(
            options.gpuType, acceptable_gpus))

    # need to go through this garbage (copied from the main() in progressive_cactus) to
    # come up with the project
    options.cactusDir = getTempDirectory()
    #Create the progressive cactus project
    projWrapper = ProjectWrapper(options, options.configFile)
    projWrapper.writeXml()
    # used to unique jobstore
    options.jobStoreCount = 0

    pjPath = os.path.join(options.cactusDir, ProjectWrapper.alignmentDirName,
                          '%s_project.xml' % ProjectWrapper.alignmentDirName)
    assert os.path.exists(pjPath)

    project = MultiCactusProject()

    if not os.path.isdir(options.cactusDir):
        os.makedirs(options.cactusDir)

    project.readXML(pjPath)

    enableDumpStack()
    cactusPrepare(options, project)
Пример #5
0
def main(toil_mode=False):
    parser = ArgumentParser()
    if toil_mode:
        Job.Runner.addToilOptions(parser)
        parser.add_argument("--latest", dest="latest", action="store_true",
                            help="Use the latest version of the docker container "
                            "rather than pulling one matching this version of cactus")
        parser.add_argument("--containerImage", dest="containerImage", default=None,
                            help="Use the the specified pre-built containter image "
                            "rather than pulling one from quay.io")
        parser.add_argument("--binariesMode", choices=["docker", "local", "singularity"],
                            help="The way to run the Cactus binaries (at top level; use --cactusOpts to set it in nested calls)",
                            default=None)
    parser.add_argument("seqFile", help = "Seq file")
    parser.add_argument("--outDir", help='Directory where the processed leaf sequence and ancestral sequences will be placed.'
                        ' Required when not using --wdl')
    parser.add_argument("--outSeqFile", help="Path for annotated Seq file output [default: outDir/seqFile]")
    parser.add_argument("--outHal", help="Output HAL file [default: outDir/out.hal]", required=toil_mode)
    if not toil_mode:
        parser.add_argument("--wdl", action="store_true", help="output wdl workflow instead of list of commands")
        parser.add_argument("--noLocalInputs", action="store_true", help="dont embed local input paths in WDL script (as they will need"
                            " to be respecified when running on Terra")
        parser.add_argument("--jobStore", type=str, default="./jobstore", help="base directory of jobStores to use in suggested commands")
    parser.add_argument("--configFile", default=os.path.join(cactusRootPath(), "cactus_progressive_config.xml"))
    parser.add_argument("--preprocessBatchSize", type=int, default=3, help="size (number of genomes) of suggested preprocessing jobs")
    parser.add_argument("--halOptions", type=str, default="--hdf5InMemory", help="options for every hal command")
    parser.add_argument("--cactusOptions", type=str, default="--realTimeLogging --logInfo --retryCount 0", help="options for every cactus command")
    parser.add_argument("--preprocessOnly", action="store_true", help="only decompose into preprocessor and cactus jobs")
    parser.add_argument("--dockerImage", type=str, help="docker image to use as wdl runtime")
    
    parser.add_argument("--gpu", action="store_true", help="use gpu-enabled lastz in cactus-blast")
    parser.add_argument("--gpuType", default="nvidia-tesla-v100", help="GPU type (to set in WDL runtime parameters)")
    parser.add_argument("--gpuCount", default=1, help="GPU count (to set in WDL runtime parameters)")
    parser.add_argument("--nvidiaDriver", default="440.64.00", help="Nvidia driver version")
    parser.add_argument("--gpuZone", default="us-central1-c", help="zone used for gpu task")
    parser.add_argument("--zone", default="us-west2-a", help="zone used for all but gpu tasks")

    if not toil_mode:
        parser.add_argument("--defaultCores", type=int, help="Number of cores for each job unless otherwise specified")
    parser.add_argument("--preprocessCores", type=int, help="Number of cores for each cactus-preprocess job")
    parser.add_argument("--blastCores", type=int, help="Number of cores for each cactus-blast job")
    parser.add_argument("--alignCores", type=int, help="Number of cores for each cactus-align job")

    if not toil_mode:
        parser.add_argument("--defaultMemory", type=human2bytesN, help="Memory for each job unless otherwise specified. "
                            "Standard suffixes like K, Ki, M, Mi, G or Gi are supported (default=bytes)")
    parser.add_argument("--preprocessMemory", type=human2bytesN, help="Memory for each cactus-preprocess job. "
                        "Standard suffixes like K, Ki, M, Mi, G or Gi are supported (default=bytes)")
    parser.add_argument("--blastMemory", type=human2bytesN, help="Memory for each cactus-blast job. "
                        "Standard suffixes like K, Ki, M, Mi, G or Gi are supported (default=bytes)")                                
    parser.add_argument("--alignMemory", type=human2bytesN, help="Memory for each cactus-align job. "
                        "Standard suffixes like K, Ki, M, Mi, G or Gi are supported (default=bytes)")

    if not toil_mode:
        parser.add_argument("--defaultDisk", type=human2bytesN, help="Disk for each job unless otherwise specified. "
                            "Standard suffixes like K, Ki, M, Mi, G or Gi are supported (default=bytes)")
    parser.add_argument("--preprocessDisk", type=human2bytesN, help="Disk for each cactus-preprocess job. "
                        "Standard suffixes like K, Ki, M, Mi, G or Gi are supported (default=bytes)")
    parser.add_argument("--blastDisk", type=human2bytesN, help="Disk for each cactus-blast job. "
                        "Standard suffixes like K, Ki, M, Mi, G or Gi are supported (default=bytes)")
    parser.add_argument("--alignDisk", type=human2bytesN, help="Disk for each cactus-align job. "
                        "Standard suffixes like K, Ki, M, Mi, G or Gi are supported (default=bytes)")
    parser.add_argument("--halAppendDisk", type=human2bytesN, help="Disk for each halAppendSubtree job. "
                        "Standard suffixes like K, Ki, M, Mi, G or Gi are supported (default=bytes)")

    parser.add_argument("--preprocessPreemptible", type=int, help="Preemptible attempt count for each cactus-preprocess job [default=2]", default=2)
    parser.add_argument("--blastPreemptible", type=int, help="Preemptible attempt count for each cactus-blast job [default=1]", default=1)
    parser.add_argument("--alignPreemptible", type=int, help="Preemptible attempt count for each cactus-align job [default=1]", default=1)
    parser.add_argument("--halAppendPreemptible", type=int, help="Preemptible attempt count for each halAppendSubtree job [default=1]", default=1)
    parser.add_argument("--database", choices=["kyoto_tycoon", "redis"], help="The type of database", default="kyoto_tycoon")

    options = parser.parse_args()
    #todo support root option
    options.root = None

    if toil_mode:
        options.wdl = False
        options.noLocalInputs = False
        options.outDir = '.'
        setupBinaries(options)
        # need to avoid nested container calls, so set toil-inside-toil jobs to local by default
        if "--binariesMode" not in options.cactusOptions:
            options.cactusOptions += " --binariesMode local"
        if options.jobStore.startswith('aws'):
            if not options.outHal.startswith('s3://'):
                raise RuntimeError("--outHal must be s3:// address when using s3 job store")
            if not has_s3:
                raise RuntimeError("S3 support requires toil to be installed with [aws]")
    options.toil = toil_mode

    if not options.wdl and not options.toil:
        if not options.outDir:
            raise RuntimeError("--outDir option required when not using --wdl")
        if not options.outSeqFile:
            options.outSeqFile = os.path.join(options.outDir, os.path.basename(options.seqFile))
            if os.path.abspath(options.seqFile) == os.path.abspath(options.outSeqFile):
                options.outSeqFile += '.1'
                
    if (not options.wdl or not options.gpu) and (options.gpuCount > 1 or options.gpuType != "nvidia-tesla-v100"):
        raise RuntimeError("--gpuType and gpuCount can only be used with --wdl --gpu")

    if not options.outHal:
        options.outHal = os.path.join(options.outDir if options.outDir else '', 'out.hal')

    if options.wdl:
        # wdl handles output file structure
        if options.outDir:
            sys.stderr.write("Warning: --outDir option ignored with --wdl\n")
        options.outDir = "."
        if options.outSeqFile:
            sys.stderr.write("Warning: --outSeqFile option ignored with --wdl\n")
            options.outSeqFile = None
        if options.preprocessOnly:
            raise RuntimeError('--preprocessOnly cannot be used in conjunction with --wdl')
    if not options.dockerImage:
        options.dockerImage = getDockerImage()
    # apply defaults
    if options.defaultCores:
        if not options.preprocessCores:
            options.preprocessCores = options.defaultCores
        if not options.blastCores:
            options.blastCores = options.defaultCores
        if not options.alignCores:
            options.alignCores = options.defaultCores
    if options.defaultMemory:
        if not options.preprocessMemory:
            options.preprocessMemory = options.defaultMemory
        if not options.blastMemory:
            options.blastMemory = options.defaultMemory
        if not options.alignMemory:
            options.alignMemory = options.defaultMemory
    if not options.alignCores or options.alignCores == 1:
        if options.alignCores == 1:
            sys.stderr.write("Warning: --alignCores changed from 1 to 2\n")
        options.alignCores = 2
    if options.defaultDisk:
        if not options.preprocessDisk:
            options.preprocessDisk = options.defaultDisk
        if not options.blastDisk:
            options.blastDisk = options.defaultDisk
        if not options.alignDisk:
            options.alignDisk = options.defaultDisk
        if not options.halAppendDisk:
            options.halAppendDisk = options.defaultDisk

    # todo: no reason not to support non-1 batch size, but mirror wdl logic for now
    if options.toil:
        if options.preprocessBatchSize != 1:
            if options.preprocessBatchSize != 3:
                # hacky way to only warn for non-default
                sys.stderr.write("Warning: --preprocessBatchSize reset to 1 for --wdl support\n")
            options.preprocessBatchSize = 1
        # todo: could also support this
        assert not options.preprocessOnly

    # https://cromwell.readthedocs.io/en/stable/RuntimeAttributes/#gpucount-gputype-and-nvidiadriverversion
    # note: k80 not included as WGA_GPU doesn't run on it.  
    acceptable_gpus = ['nvidia-tesla-v100', 'nvidia-tesla-p100', 'nvidia-tesla-p4', 'nvidia-tesla-t4']
    if options.gpuType not in acceptable_gpus:
        raise RuntimeError('--gpuType {} not supported by Terra.  Acceptable types are {}'.format(
            options.gpuType, acceptable_gpus))

    # need to go through this garbage (copied from the main() in progressive_cactus) to
    # come up with the project
    options.cactusDir = getTempDirectory()
    #Create the progressive cactus project
    projWrapper = ProjectWrapper(options, options.configFile)
    projWrapper.writeXml()
    # used to unique jobstore
    options.jobStoreCount = 0

    pjPath = os.path.join(options.cactusDir, ProjectWrapper.alignmentDirName,
                          '%s_project.xml' % ProjectWrapper.alignmentDirName)
    assert os.path.exists(pjPath)

    project = MultiCactusProject()

    if not os.path.isdir(options.cactusDir):
        os.makedirs(options.cactusDir)

    project.readXML(pjPath)

    enableDumpStack()
    cactusPrepare(options, project)