示例#1
0
 def run(self):
     #Load the multi-cactus project
     project = MultiCactusProject()
     project.readXML(self.args[0])
     #Create jobs to create the output sequences
     configNode = ET.parse(project.getConfigPath()).getroot()
     ConfigWrapper(configNode).substituteAllPredefinedConstantsWithLiterals() #This is necessary..
     #Create the preprocessor
     self.addChildTarget(CactusPreprocessor(project.getInputSequencePaths(), 
                                            CactusPreprocessor.getOutputSequenceFiles(project.getInputSequencePaths(), project.getOutputSequenceDir()),
                                            configNode))
     #Now build the progressive-down target
     schedule = Schedule()
     schedule.loadProject(project)
     schedule.compute()
     if self.options.event == None:
         self.options.event = project.mcTree.getRootName()
     assert self.options.event in project.expMap
     leafNames = [ project.mcTree.getName(i) for i in project.mcTree.getLeaves() ]
     self.options.globalLeafEventSet = set(leafNames)
     self.setFollowOnTarget(ProgressiveDown(self.options, project, self.options.event, schedule))
示例#2
0
def main():
    parser = ArgumentParser()
    Job.Runner.addToilOptions(parser)
    addCactusWorkflowOptions(parser)

    parser.add_argument("seqFile", help="Seq file")
    parser.add_argument("outputHal", type=str, help="Output HAL file")

    #Progressive Cactus Options
    parser.add_argument("--database",
                        dest="database",
                        help="Database type: tokyo_cabinet or kyoto_tycoon"
                        " [default: %(default)s]",
                        default="kyoto_tycoon")
    parser.add_argument("--configFile",
                        dest="configFile",
                        help="Specify cactus configuration file",
                        default=None)
    parser.add_argument(
        "--root",
        dest="root",
        help="Name of ancestral node (which"
        " must appear in NEWICK tree in <seqfile>) to use as a "
        "root for the alignment.  Any genomes not below this node "
        "in the tree may be used as outgroups but will never appear"
        " in the output.  If no root is specifed then the root"
        " of the tree is used. ",
        default=None)
    parser.add_argument("--latest",
                        dest="latest",
                        action="store_true",
                        help="Use the latest, locally-built docker container "
                        "rather than pulling from quay.io")
    parser.add_argument("--binariesMode",
                        choices=["docker", "local", "singularity"],
                        help="The way to run the Cactus binaries",
                        default=None)

    options = parser.parse_args()
    options.cactusDir = getTempDirectory()

    setupBinaries(options)
    setLoggingFromOptions(options)

    # Mess with some toil options to create useful defaults.

    # Caching generally slows down the cactus workflow, plus some
    # methods like readGlobalFileStream don't support forced
    # reads directly from the job store rather than from cache.
    options.disableCaching = True
    # Job chaining breaks service termination timing, causing unused
    # databases to accumulate and waste memory for no reason.
    options.disableChaining = True
    # The default deadlockWait is currently 60 seconds. This can cause
    # issues if the database processes take a while to actually begin
    # after they're issued. Change it to at least an hour so that we
    # don't preemptively declare a deadlock.
    if options.deadlockWait is None or options.deadlockWait < 3600:
        options.deadlockWait = 3600
    if options.retryCount is None:
        # If the user didn't specify a retryCount value, make it 5
        # instead of Toil's default (1).
        options.retryCount = 5

    #Create the progressive cactus project
    projWrapper = ProjectWrapper(options)
    projWrapper.writeXml()

    pjPath = os.path.join(options.cactusDir, ProjectWrapper.alignmentDirName,
                          '%s_project.xml' % ProjectWrapper.alignmentDirName)
    assert os.path.exists(pjPath)

    project = MultiCactusProject()

    if not os.path.isdir(options.cactusDir):
        os.makedirs(options.cactusDir)

    with Toil(options) as toil:
        importSingularityImage()
        #Run the workflow
        if options.restart:
            halID = toil.restart()
        else:
            project.readXML(pjPath)
            #import the sequences
            seqIDs = []
            for seq in project.getInputSequencePaths():
                if os.path.isdir(seq):
                    tmpSeq = getTempFile()
                    catFiles([
                        os.path.join(seq, subSeq) for subSeq in os.listdir(seq)
                    ], tmpSeq)
                    seq = tmpSeq
                seq = makeURL(seq)
                seqIDs.append(toil.importFile(seq))
            project.setInputSequenceIDs(seqIDs)

            #import cactus config
            if options.configFile:
                cactusConfigID = toil.importFile(makeURL(options.configFile))
            else:
                cactusConfigID = toil.importFile(
                    makeURL(project.getConfigPath()))
            logger.info("Setting config id to: %s" % cactusConfigID)
            project.setConfigID(cactusConfigID)

            project.syncToFileStore(toil)
            configNode = ET.parse(project.getConfigPath()).getroot()
            configWrapper = ConfigWrapper(configNode)
            configWrapper.substituteAllPredefinedConstantsWithLiterals()

            project.writeXML(pjPath)
            halID = toil.start(
                RunCactusPreprocessorThenProgressiveDown(
                    options, project, memory=configWrapper.getDefaultMemory()))

        toil.exportFile(halID, makeURL(options.outputHal))
def main():
    parser = ArgumentParser()
    Job.Runner.addToilOptions(parser)
    addCactusWorkflowOptions(parser)

    parser.add_argument("seqFile", help = "Seq file")
    parser.add_argument("outputHal", type=str, help = "Output HAL file")

    #Progressive Cactus Options
    parser.add_argument("--database", dest="database",
                      help="Database type: tokyo_cabinet or kyoto_tycoon"
                      " [default: %(default)s]",
                      default="kyoto_tycoon")
    parser.add_argument("--configFile", dest="configFile",
                      help="Specify cactus configuration file",
                      default=None)
    parser.add_argument("--root", dest="root", help="Name of ancestral node (which"
                      " must appear in NEWICK tree in <seqfile>) to use as a "
                      "root for the alignment.  Any genomes not below this node "
                      "in the tree may be used as outgroups but will never appear"
                      " in the output.  If no root is specifed then the root"
                      " of the tree is used. ", default=None)   
    parser.add_argument("--latest", dest="latest", action="store_true",
                        help="Use the latest version of the docker container "
                        "rather than pulling one matching this version of cactus")
    parser.add_argument("--containerImage", dest="containerImage", default=None,
                        help="Use the the specified pre-built containter image "
                        "rather than pulling one from quay.io")
    parser.add_argument("--binariesMode", choices=["docker", "local", "singularity"],
                        help="The way to run the Cactus binaries", default=None)

    options = parser.parse_args()

    setupBinaries(options)
    setLoggingFromOptions(options)

    # Mess with some toil options to create useful defaults.

    # Caching generally slows down the cactus workflow, plus some
    # methods like readGlobalFileStream don't support forced
    # reads directly from the job store rather than from cache.
    options.disableCaching = True
    # Job chaining breaks service termination timing, causing unused
    # databases to accumulate and waste memory for no reason.
    options.disableChaining = True
    # The default deadlockWait is currently 60 seconds. This can cause
    # issues if the database processes take a while to actually begin
    # after they're issued. Change it to at least an hour so that we
    # don't preemptively declare a deadlock.
    if options.deadlockWait is None or options.deadlockWait < 3600:
        options.deadlockWait = 3600
    if options.retryCount is None:
        # If the user didn't specify a retryCount value, make it 5
        # instead of Toil's default (1).
        options.retryCount = 5

    with Toil(options) as toil:
        importSingularityImage()
        #Run the workflow
        if options.restart:
            halID = toil.restart()
        else:
            options.cactusDir = getTempDirectory()
            #Create the progressive cactus project 
            projWrapper = ProjectWrapper(options)
            projWrapper.writeXml()

            pjPath = os.path.join(options.cactusDir, ProjectWrapper.alignmentDirName,
                                  '%s_project.xml' % ProjectWrapper.alignmentDirName)
            assert os.path.exists(pjPath)

            project = MultiCactusProject()

            if not os.path.isdir(options.cactusDir):
                os.makedirs(options.cactusDir)

            project.readXML(pjPath)
            #import the sequences
            seqIDs = []
            print "Importing %s sequences" % (len(project.getInputSequencePaths()))
            for seq in project.getInputSequencePaths():
                if os.path.isdir(seq):
                    tmpSeq = getTempFile()
                    catFiles([os.path.join(seq, subSeq) for subSeq in os.listdir(seq)], tmpSeq)
                    seq = tmpSeq
                seq = makeURL(seq)
                seqIDs.append(toil.importFile(seq))
            project.setInputSequenceIDs(seqIDs)

            #import cactus config
            if options.configFile:
                cactusConfigID = toil.importFile(makeURL(options.configFile))
            else:
                cactusConfigID = toil.importFile(makeURL(project.getConfigPath()))
            project.setConfigID(cactusConfigID)

            project.syncToFileStore(toil)
            configNode = ET.parse(project.getConfigPath()).getroot()
            configWrapper = ConfigWrapper(configNode)
            configWrapper.substituteAllPredefinedConstantsWithLiterals()


            project.writeXML(pjPath)
            halID = toil.start(RunCactusPreprocessorThenProgressiveDown(options, project, memory=configWrapper.getDefaultMemory()))

        toil.exportFile(halID, makeURL(options.outputHal))