Пример #1
0
 def RunAnalysis(self, qSpeciesTree=True):
     util.PrintUnderline("Calculating gene distances")
     ogs, ogMatrices_partial = self.GetOGMatrices_FullParallel()
     ogMatrices = self.CompleteAndWriteOGMatrices(ogs, ogMatrices_partial)
     util.PrintTime("Done")
     cmds_trees = self.PrepareGeneTreeCommand()
     qLessThanFourSpecies = len(self.ogSet.seqsInfo.speciesToUse) < 4
     if qLessThanFourSpecies:
         qSTAG = False
         spTreeFN_ids = files.FileHandler.GetSpeciesTreeUnrootedFN()
         WriteSpeciesTreeIDs_TwoThree(self.ogSet.seqsInfo.speciesToUse, spTreeFN_ids)
     else:
         qSTAG = self.EnoughOGsForSTAG(ogs, self.ogSet.seqsInfo.speciesToUse)
         if not qSTAG:
             print("Using fallback species tree inference method")
             D, spPairs = self.SpeciesTreeDistances(ogs, ogMatrices)
             cmd_spTree, spTreeFN_ids = self.PrepareSpeciesTreeCommand(D, spPairs)
             cmds_trees = [[cmd_spTree]] + cmds_trees
     util.PrintUnderline("Inferring gene and species trees")
     util.RunParallelOrderedCommandLists(self.nProcesses, cmds_trees)
     if qSTAG:
         # Trees must have been completed
         print("")
         spTreeFN_ids = files.FileHandler.GetSpeciesTreeUnrootedFN()
         stag.Run_ForOrthoFinder(files.FileHandler.GetOGsTreeDir(), files.FileHandler.GetWorkingDirectory_Write(), self.ogSet.seqsInfo.speciesToUse, spTreeFN_ids)
     seqDict = self.ogSet.Spec_SeqDict()
     for iog in xrange(len(self.ogSet.OGs())):
         util.RenameTreeTaxa(files.FileHandler.GetOGsTreeFN(iog), files.FileHandler.GetOGsTreeFN(iog, True), seqDict, qSupport=False, qFixNegatives=True)
     if qSpeciesTree:
         util.RenameTreeTaxa(spTreeFN_ids, files.FileHandler.GetSpeciesTreeUnrootedFN(True), self.ogSet.SpeciesDict(), qSupport=False, qFixNegatives=True)        
         return spTreeFN_ids, qSTAG
     else:      
         return None, qSTAG
Пример #2
0
    def RunAnalysis(self, qSpeciesTree=True):
        ogs, ogMatrices_partial = self.GetOGMatrices()
        ogMatrices = self.CompleteAndWriteOGMatrices(ogs, ogMatrices_partial)

        D, spPairs = self.SpeciesTreeDistances(ogs, ogMatrices)
        cmd_spTree, spTreeFN_ids = self.PrepareSpeciesTreeCommand(D, spPairs)
        cmds_geneTrees = self.PrepareGeneTreeCommand()
        util.PrintUnderline("Inferring gene and species trees")
        util.RunParallelOrderedCommandLists(self.nProcesses,
                                            [[cmd_spTree]] + cmds_geneTrees,
                                            qHideStdout=True)
        seqDict = self.ogSet.Spec_SeqDict()
        for iog in xrange(len(self.ogSet.OGs())):
            util.RenameTreeTaxa(self.TreeFilename_IDs(iog),
                                self.treesPat % iog,
                                seqDict,
                                qFixNegatives=True)
        if qSpeciesTree:
            spTreeUnrootedFN = self.workingDir + "SpeciesTree_unrooted.txt"
            util.RenameTreeTaxa(spTreeFN_ids,
                                spTreeUnrootedFN,
                                self.ogSet.SpeciesDict(),
                                qFixNegatives=True)
            return len(ogs), D, spTreeFN_ids, spTreeUnrootedFN
        else:
            return len(ogs), D, None, None
Пример #3
0
def ReconciliationAndOrthologues(treesIDsPatFn, ogSet, speciesTree_fn, workingDir, resultsDir, reconTreesRenamedDir, nParallel, iSpeciesTree=None, pickleDir = None):
    """
    treesPatFn - function returning name of filename
    ogSet - info about the orthogroups, species etc
    speciesTree_fn - the species tree
    workingDir - Orthologues working dir
    resultsDir - where the Orthologues top level results directory will go (should exist already)
    reconTreesRenamedDir - where to put the reconcilled trees that use the gene accessions
    iSpeciesTree - which of the potential roots of the species tree is this
    """
    dlcparResultsDir = RunDlcpar(treesIDsPatFn, ogSet, speciesTree_fn, workingDir, nParallel)
    if not os.path.exists(reconTreesRenamedDir): os.mkdir(reconTreesRenamedDir)
    for iog in xrange(len(ogSet.OGs())):
        util.RenameTreeTaxa(dlcparResultsDir + "OG%07d_tree_id.dlcpar.locus.tree" % iog, reconTreesRenamedDir + "OG%07d_tree.txt" % iog, ogSet.Spec_SeqDict(), qFixNegatives=False, inFormat=8)

    # Orthologue lists
    util.PrintUnderline("Inferring orthologues from gene trees" + (" (root %d)"%iSpeciesTree if iSpeciesTree != None else ""))
    qDelDir = False
    if pickleDir == None: 
        pickleDir = workingDir + "matrices_orthologues/"
        if not os.path.exists(pickleDir): os.mkdir(pickleDir)
        qDelDir = True    
    rt.create_orthologue_lists(ogSet, resultsDir, dlcparResultsDir, pickleDir)  
    # If a temporary matrices directory was created, delete it now
    if qDelDir:
        if os.path.exists(pickleDir): 
            try:
                os.rmdir(pickleDir)
            except OSError:
                pass
Пример #4
0
def DoOrthologuesForOrthoFinder_Phyldog(ogSet, workingDirectory, GeneToSpecies, output_dir, reconTreesRenamedDir):    # Create directory structure
    speciesDict = ogSet.SpeciesDict()
    SequenceDict = ogSet.SequenceDict()
    # Write directory and file structure
    speciesIDs = ogSet.speciesToUse
    nspecies = len(speciesIDs)      
    for index1 in xrange(nspecies):
        d = output_dir + "Orthologues_" + speciesDict[str(speciesIDs[index1])] + "/"
        if not os.path.exists(d): os.mkdir(d)     
        for index2 in xrange(nspecies):
            if index2 == index1: continue
            with open(d + '%s__v__%s.tsv' % (speciesDict[str(speciesIDs[index1])], speciesDict[str(speciesIDs[index2])]), 'wb') as outfile:
                writer1 = csv.writer(outfile, delimiter="\t")
                writer1.writerow(("Orthogroup", speciesDict[str(speciesIDs[index1])], speciesDict[str(speciesIDs[index2])]))
    nOgs = len(ogSet.OGs())
    nOrthologues_SpPair = util.nOrtho_sp(nspecies) 
    with open(files.FileHandler.GetDuplicationsFN(), 'wb') as outfile:
        dupWriter = csv.writer(outfile, delimiter="\t")
        dupWriter.writerow(["Orthogroup", "Species Tree Node", "Gene Tree Node", "Support", "Type",	"Genes 1", "Genes 2"])
        for iog in xrange(nOgs):
            recon_tree = files.FileHandler.GetPhyldogOGResultsTreeFN(iog)
            orthologues = GetOrthologues_from_phyldog_tree(iog, recon_tree, GeneToSpecies, dupsWriter=dupWriter, seqIDs=ogSet.Spec_SeqDict(), spIDs=ogSet.SpeciesDict())
            allOrthologues = [(iog, orthologues)]
            util.RenameTreeTaxa(recon_tree, reconTreesRenamedDir + "OG%07d_tree.txt" % iog, ogSet.Spec_SeqDict(), qSupport=False, qFixNegatives=True, label='n') 
            if iog >= 0 and divmod(iog, 10 if nOgs <= 200 else 100 if nOgs <= 2000 else 1000)[1] == 0:
                util.PrintTime("Done %d of %d" % (iog, nOgs))
            nOrthologues_SpPair += AppendOrthologuesToFiles(allOrthologues, speciesDict, ogSet.speciesToUse, SequenceDict, output_dir, False)
    return nOrthologues_SpPair
Пример #5
0
 def SpeciesTreeOnly(self):
     ogs, ogMatrices_partial = self.GetOGMatrices()
     ogMatrices = self.CompleteOGMatrices(ogs, ogMatrices_partial)
     D, spPairs = self.SpeciesTreeDistances(ogs, ogMatrices)
     cmd_spTree, spTreeFN_ids = self.PrepareSpeciesTreeCommand(D, spPairs, True)
     util.RunOrderedCommandList([cmd_spTree], True)
     spTreeUnrootedFN = self.workingDir + "SpeciesTree_unrooted.txt"
     util.RenameTreeTaxa(spTreeFN_ids, spTreeUnrootedFN, self.ogSet.SpeciesDict(), qFixNegatives=True)  
     return spTreeFN_ids, spTreeUnrootedFN
Пример #6
0
def ReconciliationAndOrthologues(recon_method, ogSet, nParallel, iSpeciesTree=None, all_stride_dup_genes=None):
    """
    ogSet - info about the orthogroups, species etc
    resultsDir - where the Orthologues top level results directory will go (should exist already)
    reconTreesRenamedDir - where to put the reconcilled trees that use the gene accessions
    iSpeciesTree - which of the potential roots of the species tree is this
    method - can be dlcpar, dlcpar_deep, of_recon
    """
    speciesTree_ids_fn = files.FileHandler.GetSpeciesTreeIDsRootedFN()
    labeled_tree_fn = files.FileHandler.GetSpeciesTreeResultsNodeLabelsFN()
    util.RenameTreeTaxa(speciesTree_ids_fn, labeled_tree_fn, ogSet.SpeciesDict(), qSupport=False, qFixNegatives=True, label='N')
    workingDir = files.FileHandler.GetWorkingDirectory_Write()    # workingDir - Orthologues working dir
    resultsDir_ologs = files.FileHandler.GetOrthologuesDirectory()
    reconTreesRenamedDir = files.FileHandler.GetOGsReconTreeDir(True)
    if "dlcpar" in recon_method:
        qDeepSearch = (recon_method == "dlcpar_convergedsearch")
        util.PrintTime("Starting DLCpar")
        dlcparResultsDir, dlcparLocusTreePat = trees2ologs_dlcpar.RunDlcpar(ogSet, speciesTree_ids_fn, workingDir, nParallel, qDeepSearch)
        util.PrintTime("Done DLCpar")
        spec_seq_dict = ogSet.Spec_SeqDict()
        for iog in xrange(len(ogSet.OGs())):
            util.RenameTreeTaxa(dlcparResultsDir + dlcparLocusTreePat % iog, files.FileHandler.GetOGsReconTreeFN(iog), spec_seq_dict, qSupport=False, qFixNegatives=False, inFormat=8, label='n')
    
        # Orthologue lists
        util.PrintUnderline("Inferring orthologues from gene trees" + (" (root %d)"%iSpeciesTree if iSpeciesTree != None else ""))
        pickleDir = files.FileHandler.GetPickleDir()
        nOrthologues_SpPair = trees2ologs_dlcpar.create_orthologue_lists(ogSet, resultsDir_ologs, dlcparResultsDir, pickleDir)  

    elif "phyldog" == recon_method:
        util.PrintTime("Starting Orthologues from Phyldog")
        nOrthologues_SpPair = trees2ologs_of.DoOrthologuesForOrthoFinder_Phyldog(ogSet, workingDir, trees2ologs_of.GeneToSpecies_dash, resultsDir_ologs, reconTreesRenamedDir)
        util.PrintTime("Done Orthologues from Phyldog")
    else:
        util.PrintTime("Starting OF Orthologues")
        qNoRecon = ("only_overlap" == recon_method)
        nOrthologues_SpPair = trees2ologs_of.DoOrthologuesForOrthoFinder(ogSet, speciesTree_ids_fn, trees2ologs_of.GeneToSpecies_dash, all_stride_dup_genes, qNoRecon)
        util.PrintTime("Done OF Orthologues")
    nOrthologues_SpPair += TwoAndThreeGeneOrthogroups(ogSet, resultsDir_ologs)
    WriteOrthologuesStats(ogSet, nOrthologues_SpPair)
Пример #7
0
 def SpeciesTreeOnly(self):
     qLessThanFourSpecies = len(self.ogSet.seqsInfo.speciesToUse) < 4
     if qLessThanFourSpecies:
         spTreeFN_ids = files.FileHandler.GetSpeciesTreeUnrootedFN()
         WriteSpeciesTreeIDs_TwoThree(self.ogSet.seqsInfo.speciesToUse, spTreeFN_ids)
     else:
         ogs, ogMatrices_partial = self.GetOGMatrices_FullParallel()
         ogMatrices = self.CompleteOGMatrices(ogs, ogMatrices_partial)
         D, spPairs = self.SpeciesTreeDistances(ogs, ogMatrices)
         cmd_spTree, spTreeFN_ids = self.PrepareSpeciesTreeCommand(D, spPairs, True)
         util.RunOrderedCommandList([cmd_spTree], True)
     spTreeUnrootedFN = files.FileHandler.GetSpeciesTreeUnrootedFN(True) 
     util.RenameTreeTaxa(spTreeFN_ids, spTreeUnrootedFN, self.ogSet.SpeciesDict(), qSupport=False, qFixNegatives=True)  
     return spTreeFN_ids
Пример #8
0
def DoOrthologuesForOrthoFinder(ogSet, treesIDsPatFn, species_tree_rooted_fn, GeneToSpecies, workingDir, output_dir, reconTreesRenamedDir, all_stride_dup_genes):    # Create directory structure
    speciesDict = ogSet.SpeciesDict()
    SequenceDict = ogSet.SequenceDict()
    # Write directory and file structure
    speciesIDs = ogSet.speciesToUse
    nspecies = len(speciesIDs)      
    dSuspect = output_dir + "Putative_Xenologues/"
    if not os.path.exists(dSuspect): os.mkdir(dSuspect)     
    for index1 in xrange(nspecies):
        with open(dSuspect + '%s.csv' % speciesDict[str(speciesIDs[index1])], 'wb') as outfile:
            writer1 = csv.writer(outfile, delimiter="\t")
            writer1.writerow(("Orthogroup", speciesDict[str(speciesIDs[index1])], "Other"))
        d = output_dir + "Orthologues_" + speciesDict[str(speciesIDs[index1])] + "/"
        if not os.path.exists(d): os.mkdir(d)     
        for index2 in xrange(nspecies):
            if index2 == index1: continue
            with open(d + '%s__v__%s.csv' % (speciesDict[str(speciesIDs[index1])], speciesDict[str(speciesIDs[index2])]), 'wb') as outfile:
                writer1 = csv.writer(outfile, delimiter="\t")
                writer1.writerow(("Orthogroup", speciesDict[str(speciesIDs[index1])], speciesDict[str(speciesIDs[index2])]))
    # Infer orthologues and write them to file           
    species_tree_rooted = tree_lib.Tree(species_tree_rooted_fn)
    neighbours = GetSpeciesNeighbours(species_tree_rooted)
    # Label nodes of species tree
    species_tree_rooted.name = "N0"    
    iNode = 1
    for n in species_tree_rooted.traverse():
        if (not n.is_leaf()) and (not n.is_root()):
            n.name = "N%d" % iNode
            iNode += 1
    nOgs = len(ogSet.OGs())
    nOrthologues_SpPair = util.nOrtho_sp(nspecies) 
    species = speciesDict.keys()
    with open(reconTreesRenamedDir + "../Duplications.csv", 'wb') as outfile:
        dupWriter = csv.writer(outfile, delimiter="\t")
        dupWriter.writerow(["Orthogroup", "Species Tree Node", "Gene Tree Node", "Support", "Type",	"Genes 1", "Genes 2"])
        for iog in xrange(nOgs):
            orthologues, recon_tree, suspect_genes = GetOrthologues_for_tree(iog, treesIDsPatFn(iog), species_tree_rooted, GeneToSpecies, neighbours, dupsWriter=dupWriter, seqIDs=ogSet.Spec_SeqDict(), spIDs=ogSet.SpeciesDict(), all_stride_dup_genes=all_stride_dup_genes)
            for index0 in xrange(nspecies):
                strsp0 = species[index0]
                strsp0_ = strsp0+"_"
                these_genes = [g for g in suspect_genes if g.startswith(strsp0_)]
                if len(these_genes) > 0:
                    with open(output_dir + "Orthologues_" + speciesDict[strsp0] + "/Putative_Horizontal_Gene_Transfer.txt", 'ab') as outfile:
                        outfile.write("\n".join([SequenceDict[g]]) + "\n")
            allOrthologues = [(iog, orthologues)]
            util.RenameTreeTaxa(recon_tree, reconTreesRenamedDir + "OG%07d_tree.txt" % iog, ogSet.Spec_SeqDict(), qSupport=False, qFixNegatives=True, label='n') 
            if iog >= 0 and divmod(iog, 10 if nOgs <= 200 else 100 if nOgs <= 2000 else 1000)[1] == 0:
                util.PrintTime("Done %d of %d" % (iog, nOgs))
            nOrthologues_SpPair += AppendOrthologuesToFiles(allOrthologues, speciesDict, ogSet.speciesToUse, SequenceDict, output_dir, True)
    return nOrthologues_SpPair
Пример #9
0
 def DoTrees(self, ogs, idDict, nProcesses, qStopAfterSeqs, qStopAfterAlignments):
     # 0       
     resultsDirsFullPath = []
     for fn in [self.GetFastaFilename, self.GetAlignmentFilename, self.GetTreeFilename]:
         for qIDs in [True, False]:
             d = os.path.split(fn(0, not qIDs))[0]
             if not os.path.exists(d): os.mkdir(d)
             if not qIDs: resultsDirsFullPath.append(d)
         if qStopAfterSeqs: break
         if qStopAfterAlignments and fn == self.GetAlignmentFilename: break
     
     # 1.
     fastaWriter = FastaWriter(self.ogsWorkingDir)
     self.WriteFastaFiles(fastaWriter, ogs, idDict)
     if qStopAfterSeqs: return resultsDirsFullPath
     
     # 2
     if qStopAfterAlignments:
         util.PrintUnderline("Inferring multiple sequence alignments") 
     else:
         util.PrintUnderline("Inferring multiple sequence alignments and gene trees") 
     
     # 3
     alignCommands_and_filenames = self.GetAlignmentCommandsAndNewFilenames(ogs)
     if qStopAfterAlignments:
         pc.RunParallelCommandsAndMoveResultsFile(nProcesses, alignCommands_and_filenames, False)
         return resultsDirsFullPath[:2]
     
     # Otherwise, alignments and trees
     alignmentFilesToUse = [self.GetAlignmentFilename(i) for i, _ in enumerate(alignCommands_and_filenames)]
     treeCommands_and_filenames = self.GetTreeCommands(alignmentFilesToUse, ogs)
     commands_and_filenames = []
     for i in xrange(len(treeCommands_and_filenames)):
         commands_and_filenames.append([alignCommands_and_filenames[i], treeCommands_and_filenames[i]])
     for i in xrange(len(treeCommands_and_filenames), len(alignCommands_and_filenames)):
         commands_and_filenames.append([alignCommands_and_filenames[i]])
     pc.RunParallelCommandsAndMoveResultsFile(nProcesses, commands_and_filenames, True)
     
     # Convert ids to accessions
     for i, alignFN in enumerate(alignmentFilesToUse):
         with open(alignFN, 'rb') as infile, open(self.GetAlignmentFilename(i, True), 'wb') as outfile:
             for line in infile:
                 if line.startswith(">"):
                     outfile.write(">" + idDict[line[1:].rstrip()] + "\n")
                 else:
                     outfile.write(line)
         if os.path.exists(self.GetTreeFilename(i)):
             util.RenameTreeTaxa(self.GetTreeFilename(i), self.GetTreeFilename(i, True), idDict, qFixNegatives=True)
     
     return resultsDirsFullPath[:2]
def RunDlcpar(ogSet, speciesTreeFN, workingDir, nParallel, qDeepSearch):
    """
    
    Implementation:
    - (skip: label species tree)
    - sort out trees (midpoint root, resolve plytomies etc)
    - run
    
    """
    ogs = ogSet.OGs()
    nOGs = len(ogs)
    dlcparResultsDir = workingDir + 'dlcpar/'
    if not os.path.exists(dlcparResultsDir): os.mkdir(dlcparResultsDir)
    RootGeneTreesArbitrarily(nOGs, dlcparResultsDir)
    spec_seq_dict = ogSet.Spec_SeqDict()
    for iog in xrange(len(ogs)):
        util.RenameTreeTaxa(files.FileHandler.GetOGsTreeFN(iog),
                            files.FileHandler.GetOGsTreeFN(iog, True),
                            spec_seq_dict,
                            qSupport=False,
                            qFixNegatives=True,
                            qViaCopy=False)
    geneMapFN = WriteGeneSpeciesMap(dlcparResultsDir, ogSet.SpeciesDict())
    filenames = [
        dlcparResultsDir + os.path.split(files.FileHandler.GetOGsTreeFN(i))[1]
        for i in xrange(nOGs)
    ]
    if qDeepSearch:
        nTaxa = [len(og) for og in ogs[:nOGs]]
        nIter = [
            1000 if n < 25 else 25000 if n < 200 else 50000 for n in nTaxa
        ]
        nNoImprov = [
            100 if n < 25 else 1000 if n < 200 else 2000 for n in nTaxa
        ]
        dlcCommands = [
            'dlcpar_search -s %s -S %s -D 1 -C 0.125 %s -I .txt -i %d --nprescreen 100 --nconverge %d'
            % (speciesTreeFN, geneMapFN, fn, i, n)
            for (fn, i, n) in zip(filenames, nIter, nNoImprov)
        ]
    else:
        dlcCommands = [
            'dlcpar_search -s %s -S %s -D 1 -C 0.125 %s -I .txt -x 1' %
            (speciesTreeFN, geneMapFN, fn) for fn in filenames
        ]
    util.RunParallelOrderedCommandLists(nParallel, [[c] for c in dlcCommands])
    return dlcparResultsDir, "OG%07d_tree_id.dlcpar.locus.tree"
Пример #11
0
    def RunAnalysis(self):
        ogs, ogMatrices_partial = self.GetOGMatrices()
        ogMatrices = self.WriteOGMatrices(ogs, ogMatrices_partial)

        D, spPairs = self.SpeciesTreeDistances(ogs, ogMatrices)
        cmd_spTree, spTreeFN_ids = self.PrepareSpeciesTreeCommand(D, spPairs)
        cmds_geneTrees = self.PrepareGeneTreeCommand()
        print("\n3. Inferring gene and species trees")
        print("-----------------------------------")
        util.RunParallelOrderedCommandLists(self.nProcesses,
                                            [[cmd_spTree]] + cmds_geneTrees,
                                            qHideStdout=True)
        seqDict = self.ogSet.Spec_SeqDict()
        for iog in xrange(len(self.ogSet.OGs())):
            util.RenameTreeTaxa(self.treesPatIDs % iog,
                                self.treesPat % iog,
                                seqDict,
                                qFixNegatives=True)
#        util.RenameTreeTaxa(spTreeFN_ids, self.workingDir + "SpeciesTree_unrooted.txt", self.ogSet.SpeciesDict(), qFixNegatives=True)
        return len(ogs), D, spPairs, spTreeFN_ids
Пример #12
0
def DoOrthologuesForOrthoFinder(ogSet, treesIDsPatFn, species_tree_rooted_fn, GeneToSpecies, workingDir, output_dir, reconTreesRenamedDir, all_stride_dup_genes):    # Create directory structure
    speciesDict = ogSet.SpeciesDict()
    SequenceDict = ogSet.SequenceDict()
    # Write directory and file structure
    speciesIDs = ogSet.speciesToUse
    nspecies = len(speciesIDs)           
    for index1 in xrange(nspecies):
        d = output_dir + "Orthologues_" + speciesDict[str(speciesIDs[index1])] + "/"
        if not os.path.exists(d): os.mkdir(d)     
        for index2 in xrange(nspecies):
            if index2 == index1: continue
            with open(d + '%s__v__%s.csv' % (speciesDict[str(speciesIDs[index1])], speciesDict[str(speciesIDs[index2])]), 'wb') as outfile:
                writer1 = csv.writer(outfile, delimiter="\t")
                writer1.writerow(("Orthogroup", speciesDict[str(speciesIDs[index1])], speciesDict[str(speciesIDs[index2])]))
    # Infer orthologues and write them to file           
    species_tree_rooted = tree_lib.Tree(species_tree_rooted_fn)
    # Label nodes of species tree
    species_tree_rooted.name = "N0"    
    iNode = 1
    for n in species_tree_rooted.traverse():
        if (not n.is_leaf()) and (not n.is_root()):
            n.name = "N%d" % iNode
            iNode += 1
    nOgs = len(ogSet.OGs())
    nOrthologues_SpPair = util.nOrtho_sp(nspecies)
    allOrthologues = []
    with open(reconTreesRenamedDir + "../Duplications.csv", 'wb') as outfile:
        dupWriter = csv.writer(outfile, delimiter="\t")
        dupWriter.writerow(["Orthogroup", "Species Tree Node", "Gene Tree Node", "Support", "Type",	"Genes 1", "Genes 2"])
        for iog in xrange(nOgs):
            orthologues, recon_tree = GetOrthologues_for_tree(iog, treesIDsPatFn(iog), species_tree_rooted, GeneToSpecies, dupsWriter=dupWriter, seqIDs=ogSet.Spec_SeqDict(), spIDs=ogSet.SpeciesDict(), all_stride_dup_genes=all_stride_dup_genes)
            allOrthologues.append((iog, orthologues))
            util.RenameTreeTaxa(recon_tree, reconTreesRenamedDir + "OG%07d_tree.txt" % iog, ogSet.Spec_SeqDict(), qFixNegatives=True, label='n') 
            if iog >= 0 and divmod(iog, 10 if nOgs <= 200 else 100 if nOgs <= 2000 else 1000)[1] == 0:
                util.PrintTime("Done %d of %d" % (iog, nOgs))
    nOrthologues_SpPair += AppendOrthologuesToFiles(allOrthologues, speciesDict, ogSet.speciesToUse, SequenceDict, output_dir)
    return nOrthologues_SpPair
Пример #13
0
def GetOrthologues(orthofinderWorkingDir, orthofinderResultsDir, speciesToUse,
                   nSpAll, clustersFilename_pairs, nProcesses):
    ogSet = OrthoGroupsSet(orthofinderWorkingDir,
                           speciesToUse,
                           nSpAll,
                           clustersFilename_pairs,
                           idExtractor=util.FirstWordExtractor)
    if len(ogSet.speciesToUse) < 4:
        print("ERROR: Not enough species to infer species tree")
        util.Fail()

    print("\n1. Checking required programs are installed")
    print("-------------------------------------------")
    if not CanRunDependencies(orthofinderWorkingDir):
        print(
            "Orthogroups have been inferred but the dependencies for inferring gene trees and\northologues have not been met. Please review previous messages for more information."
        )
        sys.exit()

    print("\n2. Calculating gene distances")
    print("-----------------------------")
    resultsDir = util.CreateNewWorkingDirectory(orthofinderResultsDir +
                                                "Orthologues_")

    db = DendroBLASTTrees(ogSet, resultsDir, nProcesses)
    db.ReadAndPickle()
    nOGs, D, spPairs, spTreeFN_ids = db.RunAnalysis()

    print("\n4. Best outgroup(s) for species tree")
    print("------------------------------------")
    spDict = ogSet.SpeciesDict()
    roots, clusters, rootedSpeciesTreeFN, nSupport = rfd.GetRoot(
        spTreeFN_ids,
        os.path.split(db.treesPatIDs)[0] + "/",
        rfd.GeneToSpecies_dash,
        nProcesses,
        treeFmt=1)
    if len(roots) > 1:
        print(
            "Observed %d duplications. %d support the best roots and %d contradict them."
            % (len(clusters), nSupport, len(clusters) - nSupport))
        print("Best outgroups for species tree:")
    else:
        print(
            "Observed %d duplications. %d support the best root and %d contradict it."
            % (len(clusters), nSupport, len(clusters) - nSupport))
        print("Best outgroup for species tree:")
    for r in roots:
        print("  " + (", ".join([spDict[s] for s in r])))

    qMultiple = len(roots) > 1
    if qMultiple:
        print("\nAnalysing each of the potential species tree roots.")
    resultsSpeciesTrees = []
    for i, (r, speciesTree_fn) in enumerate(zip(roots, rootedSpeciesTreeFN)):
        if qMultiple:
            resultsDir_new = resultsDir + "Orthologues_using_outgroup_%d/" % i
            reconTreesRenamedDir = db.workingDir + "Recon_Gene_Trees_using_outgroup_%d/" % i
            resultsSpeciesTrees.append(
                resultsDir_new + "SpeciesTree_rooted_at_outgroup_%d.txt" % i)
        else:
            resultsDir_new = resultsDir + "Orthologues/"
            reconTreesRenamedDir = db.workingDir + "Recon_Gene_Trees/"
            resultsSpeciesTrees.append(resultsDir + "SpeciesTree_rooted.txt")
        os.mkdir(resultsDir_new)
        util.RenameTreeTaxa(speciesTree_fn,
                            resultsSpeciesTrees[-1],
                            db.ogSet.SpeciesDict(),
                            qFixNegatives=True)

        print("\n5%s. Reconciling gene and species trees" %
              ("-%d" % i if qMultiple else ""))
        print("-------------------------------------" +
              ("--" if qMultiple else ""))
        print("Outgroup: " + (", ".join([spDict[s] for s in r])))
        dlcparResultsDir = RunDlcpar(db.treesPatIDs, ogSet, nOGs,
                                     speciesTree_fn, db.workingDir)
        os.mkdir(reconTreesRenamedDir)
        for iog in xrange(len(db.ogSet.OGs())):
            util.RenameTreeTaxa(dlcparResultsDir +
                                "OG%07d_tree_id.locus.tree" % iog,
                                reconTreesRenamedDir + "OG%07d_tree.txt" % iog,
                                db.ogSet.Spec_SeqDict(),
                                qFixNegatives=False,
                                inFormat=8)

        # Orthologue lists
        print("\n6%s. Inferring orthologues from gene trees" %
              ("-%d" % i if qMultiple else ""))
        print("----------------------------------------" +
              ("--" if qMultiple else ""))
        pt.get_orthologue_lists(ogSet, resultsDir_new, dlcparResultsDir,
                                db.workingDir)

    CleanWorkingDir(db)
    print("\n7. Writing results files")
    print("------------------------")

    return GetResultsFilesString(resultsSpeciesTrees)
Пример #14
0
    def DoTrees(self, ogs, ogMatrix, idDict, speciesIdDict, speciesToUse, nProcesses, qStopAfterSeqs, qStopAfterAlignments, qDoSpeciesTree):
        idDict.update(speciesIdDict) # smae code will then also convert concatenated alignment for species tree
        # 0       
        resultsDirsFullPath = [files.FileHandler.GetResultsSeqsDir(), files.FileHandler.GetResultsAlignDir(), files.FileHandler.GetResultsTreesDir()]
        
        # 1.
        fastaWriter = FastaWriter(files.FileHandler.GetSpeciesSeqsDir(), speciesToUse)
        self.WriteFastaFiles(fastaWriter, ogs, idDict, True)
        if qStopAfterSeqs: return resultsDirsFullPath

        # 3
        # Get OGs to use for species tree
        if qDoSpeciesTree:
            iOgsForSpeciesTree, fSingleCopy = DetermineOrthogroupsForSpeciesTree(ogMatrix)            
            concatenated_algn_fn = files.FileHandler.GetSpeciesTreeConcatAlignFN()
        else:
            iOgsForSpeciesTree = []
        alignCommands_and_filenames = self.GetAlignmentCommandsAndNewFilenames(ogs)
        if qStopAfterAlignments:
            util.PrintUnderline("Inferring multiple sequence alignments")
            pc.RunParallelCommandsAndMoveResultsFile(nProcesses, alignCommands_and_filenames, False)
            if qDoSpeciesTree:
                CreateConcatenatedAlignment(iOgsForSpeciesTree, ogs, self.GetAlignmentFilename, concatenated_algn_fn, fSingleCopy)
                # write OGs used to file
                dSpeciesTree = os.path.split(files.FileHandler.GetSpeciesTreeResultsFN(0, True))[0] + "/"
                with open(dSpeciesTree + "Orthogroups_for_concatenated_alignment.txt", 'wb') as outfile:
                    for iog in iOgsForSpeciesTree: outfile.write("OG%07d\n" % iog)
            # ids -> accessions
            alignmentFilesToUse = [self.GetAlignmentFilename(i) for i, _ in enumerate(alignCommands_and_filenames)]        
            accessionAlignmentFNs = [self.GetAlignmentFilename(i, True) for i in xrange(len(alignmentFilesToUse))]
            if qDoSpeciesTree: 
                alignmentFilesToUse.append(concatenated_algn_fn)
                accessionAlignmentFNs.append(files.FileHandler.GetSpeciesTreeConcatAlignFN(True))
            self.RenameAlignmentTaxa(alignmentFilesToUse, accessionAlignmentFNs, idDict)
            return resultsDirsFullPath[:2]
        
        # Otherwise, alignments and trees
        # Strategy is
        # 1. Do alignments (and trees) require for species tree
        # 2. Create concatenated alignment
        # 3. Create second list of commands [speciestree] + [remaining alignments and trees]
        alignmentFilesToUse = [self.GetAlignmentFilename(i) for i, _ in enumerate(alignCommands_and_filenames)]
        treeCommands_and_filenames = self.GetTreeCommands(alignmentFilesToUse, ogs)
        commands_and_filenames = []
        if qDoSpeciesTree:
            print("Species tree: Using %d orthogroups with minimum of %0.1f%% of species having single-copy genes in any orthogroup" % (len(iOgsForSpeciesTree), 100.*fSingleCopy))
            util.PrintUnderline("Inferring multiple sequence alignments for species tree") 
            # Do required alignments and trees
            speciesTreeFN_ids = files.FileHandler.GetSpeciesTreeUnrootedFN()
            for i in iOgsForSpeciesTree:
                commands_and_filenames.append([alignCommands_and_filenames[i], treeCommands_and_filenames[i]])
            pc.RunParallelCommandsAndMoveResultsFile(nProcesses, commands_and_filenames, True)
            CreateConcatenatedAlignment(iOgsForSpeciesTree, ogs, self.GetAlignmentFilename, concatenated_algn_fn, fSingleCopy)
            # write OGs used to file
            dSpeciesTree = os.path.split(files.FileHandler.GetSpeciesTreeResultsFN(0, True))[0] + "/"
            with open(dSpeciesTree + "Orthogroups_for_concatenated_alignment.txt", 'wb') as outfile:
                for iog in iOgsForSpeciesTree: outfile.write("OG%07d\n" % iog)
            # Add species tree to list of commands to run
            commands_and_filenames = [self.program_caller.GetTreeCommands(self.tree_program, [concatenated_algn_fn], [speciesTreeFN_ids], ["SpeciesTree"])]
            util.PrintUnderline("Inferring remaining multiple sequence alignments and gene trees") 
        else:
            util.PrintUnderline("Inferring multiple sequence alignments and gene trees") 

        # Now continue as before
        iOgsForSpeciesTree = set(iOgsForSpeciesTree)                         
        for i in xrange(len(treeCommands_and_filenames)):
            if i in iOgsForSpeciesTree: continue
            commands_and_filenames.append([alignCommands_and_filenames[i], treeCommands_and_filenames[i]])
        for i in xrange(len(treeCommands_and_filenames), len(alignCommands_and_filenames)):
            if i in iOgsForSpeciesTree: continue
            commands_and_filenames.append([alignCommands_and_filenames[i]])
        pc.RunParallelCommandsAndMoveResultsFile(nProcesses, commands_and_filenames, True)
        
        # Convert ids to accessions
        accessionAlignmentFNs = [self.GetAlignmentFilename(i, True) for i in xrange(len(alignmentFilesToUse))]
        # Add concatenated Alignment
        if qDoSpeciesTree:
            alignmentFilesToUse.append(concatenated_algn_fn)
            accessionAlignmentFNs.append(files.FileHandler.GetSpeciesTreeConcatAlignFN(True))
            qHaveSupport = util.HaveSupportValues(speciesTreeFN_ids)
            if os.path.exists(speciesTreeFN_ids):
                util.RenameTreeTaxa(speciesTreeFN_ids, files.FileHandler.GetSpeciesTreeUnrootedFN(True), idDict, qSupport=qHaveSupport, qFixNegatives=True)
            else:
                text = "ERROR: Species tree inference failed"
                files.FileHandler.LogFailAndExit(text)
        self.RenameAlignmentTaxa(alignmentFilesToUse, accessionAlignmentFNs, idDict)
        qHaveSupport = None
        for i in xrange(len(treeCommands_and_filenames)):
            infn = self.GetTreeFilename(i)
            if os.path.exists(infn):
                if qHaveSupport == None: qHaveSupport = util.HaveSupportValues(infn)
                util.RenameTreeTaxa(infn, self.GetTreeFilename(i, True), idDict, qSupport=qHaveSupport, qFixNegatives=True)       
        return resultsDirsFullPath[:2]
Пример #15
0
def DoOrthologuesForOrthoFinder(ogSet, species_tree_rooted_fn, GeneToSpecies,
                                all_stride_dup_genes, qNoRecon):
    """
    """
    # Create directory structure
    speciesDict = ogSet.SpeciesDict()
    SequenceDict = ogSet.SequenceDict()
    # Write directory and file structure
    qInitialisedSuspectGenesDirs = False
    speciesIDs = ogSet.speciesToUse
    nspecies = len(speciesIDs)
    dResultsOrthologues = files.FileHandler.GetOrthologuesDirectory()
    for index1 in xrange(nspecies):
        d = dResultsOrthologues + "Orthologues_" + speciesDict[str(
            speciesIDs[index1])] + "/"
        if not os.path.exists(d): os.mkdir(d)
        for index2 in xrange(nspecies):
            if index2 == index1: continue
            with open(
                    d + '%s__v__%s.tsv' %
                (speciesDict[str(speciesIDs[index1])], speciesDict[str(
                    speciesIDs[index2])]), 'wb') as outfile:
                writer1 = csv.writer(outfile, delimiter="\t")
                writer1.writerow(
                    ("Orthogroup", speciesDict[str(speciesIDs[index1])],
                     speciesDict[str(speciesIDs[index2])]))
    # Infer orthologues and write them to file
    species_tree_rooted = tree_lib.Tree(species_tree_rooted_fn)
    neighbours = GetSpeciesNeighbours(species_tree_rooted)
    # Label nodes of species tree
    species_tree_rooted.name = "N0"
    iNode = 1
    for n in species_tree_rooted.traverse():
        if (not n.is_leaf()) and (not n.is_root()):
            n.name = "N%d" % iNode
            iNode += 1
    nOgs = len(ogSet.OGs())
    nOrthologues_SpPair = util.nOrtho_sp(nspecies)
    species = speciesDict.keys()
    reconTreesRenamedDir = files.FileHandler.GetOGsReconTreeDir(True)
    spec_seq_dict = ogSet.Spec_SeqDict()
    with open(files.FileHandler.GetDuplicationsFN(), 'wb') as outfile:
        dupWriter = csv.writer(outfile, delimiter="\t")
        dupWriter.writerow([
            "Orthogroup", "Species Tree Node", "Gene Tree Node", "Support",
            "Type", "Genes 1", "Genes 2"
        ])
        for iog in xrange(nOgs):
            rooted_tree_ids, qHaveSupport = CheckAndRootTree(
                files.FileHandler.GetOGsTreeFN(iog), species_tree_rooted,
                GeneToSpecies)  # this can be parallelised easily
            if rooted_tree_ids is None: continue
            # Write rooted tree with accessions
            util.RenameTreeTaxa(rooted_tree_ids,
                                files.FileHandler.GetOGsTreeFN(iog, True),
                                spec_seq_dict,
                                qSupport=qHaveSupport,
                                qFixNegatives=True,
                                qViaCopy=True)
            orthologues, recon_tree, suspect_genes = GetOrthologues_from_tree(
                iog,
                rooted_tree_ids,
                species_tree_rooted,
                GeneToSpecies,
                neighbours,
                dupsWriter=dupWriter,
                seqIDs=spec_seq_dict,
                spIDs=ogSet.SpeciesDict(),
                all_stride_dup_genes=all_stride_dup_genes,
                qNoRecon=qNoRecon)
            qContainsSuspectGenes = len(suspect_genes) > 0
            if (not qInitialisedSuspectGenesDirs) and qContainsSuspectGenes:
                qInitialisedSuspectGenesDirs = True
                dSuspectGenes = files.FileHandler.GetSuspectGenesDir()
                dSuspectOrthologues = files.FileHandler.GetPutativeXenelogsDir(
                )
                for index1 in xrange(nspecies):
                    with open(
                            dSuspectOrthologues +
                            '%s.tsv' % speciesDict[str(speciesIDs[index1])],
                            'wb') as outfile:
                        writer1 = csv.writer(outfile, delimiter="\t")
                        writer1.writerow(
                            ("Orthogroup",
                             speciesDict[str(speciesIDs[index1])], "Other"))
            for index0 in xrange(nspecies):
                strsp0 = species[index0]
                strsp0_ = strsp0 + "_"
                these_genes = [
                    g for g in suspect_genes if g.startswith(strsp0_)
                ]
                if len(these_genes) > 0:
                    with open(dSuspectGenes + speciesDict[strsp0] + ".txt",
                              'ab') as outfile:
                        outfile.write(
                            "\n".join([SequenceDict[g]
                                       for g in these_genes]) + "\n")
            allOrthologues = [(iog, orthologues)]
            # don't relabel nodes, they've already been done
            util.RenameTreeTaxa(recon_tree,
                                reconTreesRenamedDir + "OG%07d_tree.txt" % iog,
                                spec_seq_dict,
                                qSupport=False,
                                qFixNegatives=True)
            if iog >= 0 and divmod(
                    iog, 10
                    if nOgs <= 200 else 100 if nOgs <= 2000 else 1000)[1] == 0:
                util.PrintTime("Done %d of %d" % (iog, nOgs))
            nOrthologues_SpPair += AppendOrthologuesToFiles(
                allOrthologues, speciesDict, ogSet.speciesToUse, SequenceDict,
                dResultsOrthologues, qContainsSuspectGenes)
    return nOrthologues_SpPair
Пример #16
0
    def DoTrees(self, ogs, ogMatrix, idDict, speciesIdDict, nProcesses, qStopAfterSeqs, qStopAfterAlignments, qDoSpeciesTree):
        idDict.update(speciesIdDict) # smae code will then also convert concatenated alignment for species tree
        # 0       
        resultsDirsFullPath = []
        for fn in [self.GetFastaFilename, self.GetAlignmentFilename, self.GetTreeFilename]:
            for qIDs in [True, False]:
                d = os.path.split(fn(0, not qIDs))[0]
                if not os.path.exists(d): os.mkdir(d)
                if not qIDs: resultsDirsFullPath.append(d)
            if qStopAfterSeqs: break
            if qStopAfterAlignments and fn == self.GetAlignmentFilename: break
        
        # 1.
        fastaWriter = FastaWriter(self.ogsWorkingDir)
        self.WriteFastaFiles(fastaWriter, ogs, idDict)
        if qStopAfterSeqs: return resultsDirsFullPath

        # 3
        # Get OGs to use for species tree
        if qDoSpeciesTree:
            iOgsForSpeciesTree, fSingleCopy = DetermineOrthogroupsForSpeciesTree(ogMatrix)            
            concatenated_algn_fn = os.path.split(self.GetAlignmentFilename(0))[0] + "/SpeciesTreeAlignment.fa"
        else:
            iOgsForSpeciesTree = []
        alignCommands_and_filenames = self.GetAlignmentCommandsAndNewFilenames(ogs)
        if qStopAfterAlignments:
            util.PrintUnderline("Inferring multiple sequence alignments")
            pc.RunParallelCommandsAndMoveResultsFile(nProcesses, alignCommands_and_filenames, False)
            CreateConcatenatedAlignment(iOgsForSpeciesTree, ogs, self.GetAlignmentFilename, concatenated_algn_fn, fSingleCopy)
            # ids -> accessions
            alignmentFilesToUse = [self.GetAlignmentFilename(i) for i, _ in enumerate(alignCommands_and_filenames)]        
            accessionAlignmentFNs = [self.GetAlignmentFilename(i, True) for i in xrange(len(alignmentFilesToUse))]
            alignmentFilesToUse.append(concatenated_algn_fn)
            accessionAlignmentFNs.append(os.path.split(self.GetAlignmentFilename(0, True))[0] + "/SpeciesTreeAlignment.fa")
            self.RenameAlignmentTaxa(alignmentFilesToUse, accessionAlignmentFNs, idDict)
            return resultsDirsFullPath[:2]
        
        # Otherwise, alignments and trees
        # Strategy is
        # 1. Do alignments (and trees) require for species tree
        # 2. Create concatenated alignment
        # 3. Create second list of commands [speciestree] + [remaining alignments and trees]
        alignmentFilesToUse = [self.GetAlignmentFilename(i) for i, _ in enumerate(alignCommands_and_filenames)]
        treeCommands_and_filenames = self.GetTreeCommands(alignmentFilesToUse, ogs)
        commands_and_filenames = []
        if qDoSpeciesTree:
            print("Species tree: Using %d orthogroups with minimum of %0.1f%% of species having single-copy genes in any orthogroup" % (len(iOgsForSpeciesTree), 100.*fSingleCopy))
            util.PrintUnderline("Inferring multiple sequence alignments for species tree") 
            # Do required alignments and trees
            speciesTreeFN_ids = os.path.split(self.GetTreeFilename(i))[0] + "/SpeciesTree_unrooted.txt"
            for i in iOgsForSpeciesTree:
                commands_and_filenames.append([alignCommands_and_filenames[i], treeCommands_and_filenames[i]])
            pc.RunParallelCommandsAndMoveResultsFile(nProcesses, commands_and_filenames, True)
            CreateConcatenatedAlignment(iOgsForSpeciesTree, ogs, self.GetAlignmentFilename, concatenated_algn_fn, fSingleCopy)
            # Add species tree to list of commands to run
            commands_and_filenames = [self.program_caller.GetTreeCommands(self.tree_program, [concatenated_algn_fn], [speciesTreeFN_ids], ["SpeciesTree"])]
            util.PrintUnderline("Inferring remaining multiple sequence alignments and gene trees") 
        else:
            util.PrintUnderline("Inferring multiple sequence alignments and gene trees") 

        # Now continue as before
        iOgsForSpeciesTree = set(iOgsForSpeciesTree)                         
        for i in xrange(len(treeCommands_and_filenames)):
            if i in iOgsForSpeciesTree: continue
            commands_and_filenames.append([alignCommands_and_filenames[i], treeCommands_and_filenames[i]])
        for i in xrange(len(treeCommands_and_filenames), len(alignCommands_and_filenames)):
            if i in iOgsForSpeciesTree: continue
            commands_and_filenames.append([alignCommands_and_filenames[i]])
        pc.RunParallelCommandsAndMoveResultsFile(nProcesses, commands_and_filenames, True)
        
        # Convert ids to accessions
        accessionAlignmentFNs = [self.GetAlignmentFilename(i, True) for i in xrange(len(alignmentFilesToUse))]
        # Add concatenated Alignment
        if qDoSpeciesTree:
            alignmentFilesToUse.append(concatenated_algn_fn)
            accessionAlignmentFNs.append(os.path.split(self.GetAlignmentFilename(0, True))[0] + "/SpeciesTreeAlignment.fa")
            self.RenameAlignmentTaxa(alignmentFilesToUse, accessionAlignmentFNs, idDict)
            if os.path.exists(speciesTreeFN_ids):
                util.RenameTreeTaxa(speciesTreeFN_ids, self.workingDir + "SpeciesTree_unrooted.txt", idDict, qFixNegatives=True)
            else:
                print("ERROR: Species tree inference failed")
                util.Fail()
        for i in xrange(len(treeCommands_and_filenames)):
            if os.path.exists(self.GetTreeFilename(i)):
                util.RenameTreeTaxa(self.GetTreeFilename(i), self.GetTreeFilename(i, True), idDict, qFixNegatives=True)       
        return resultsDirsFullPath[:2]
Пример #17
0
def OrthologuesWorkflow(workingDir_ogs, 
                       orthofinderResultsDir, 
                       speciesToUse, nSpAll, 
                       clustersFilename_pairs, 
                       tree_options,
                       msa_method,
                       tree_method,
                       nHighParallel,
                       nLowParrallel,
                       userSpeciesTree = None, 
                       qStopAfterSeqs = False,
                       qStopAfterAlign = False,
                       qStopAfterTrees = False, 
                       qMSA = False,
                       qPhyldog = False,
                       pickleDir=None):
    """
    1. Setup:
        - ogSet, directories
        - DendroBLASTTress - object
    2. DendrobBLAST:
        - read scores
        - RunAnalysis: Get distance matrices, do trees
    3. Root species tree
    4. Reconciliation/Orthologues
    5. Clean up
    
    Variables:
    - ogSet - all the relevant information about the orthogroups, species etc.
    """
    ogSet = OrthoGroupsSet(workingDir_ogs, speciesToUse, nSpAll, clustersFilename_pairs, idExtractor = util.FirstWordExtractor, pickleDir=pickleDir)
    
    # Class that is going to run the analysis needs to check the dependencies
#    if not CanRunOrthologueDependencies(workingDir_ogs, qMSA, qStopAfterTrees, userSpeciesTree == None): 
#        print("Orthogroups have been inferred but the dependencies for inferring gene trees and")
#        print("orthologues have not been met. Please review previous messages for more information.")
#        sys.exit()
    
    resultsDir = util.CreateNewWorkingDirectory(orthofinderResultsDir + "Orthologues_")
    """ === 1 === ust = UserSpeciesTree
    MSA:               Sequences    Alignments                        GeneTrees    db    SpeciesTree
    Phyldog:           Sequences    Alignments                        GeneTrees    db    SpeciesTree  
    Dendroblast:                                  DistanceMatrices    GeneTrees    db    SpeciesTree
    MSA (ust):         Sequences    Alignments                        GeneTrees    db
    Phyldog (ust):     Sequences    Alignments                        GeneTrees    db      
    Dendroblast (ust):                            DistanceMatrices    GeneTrees    db        
    """
    if qMSA or qPhyldog:
        treeGen = msa.TreesForOrthogroups(tree_options, msa_method, tree_method, resultsDir, workingDir_ogs)
        seqs_alignments_dirs = treeGen.DoTrees(ogSet.OGs(qInclAll=True), ogSet.Spec_SeqDict(), nHighParallel, qStopAfterSeqs, qStopAfterAlign or qPhyldog) 
        if qStopAfterSeqs:
            print("")
            return ("\nSequences for orthogroups:\n   %s\n" % seqs_alignments_dirs[0])
        elif qStopAfterAlign:
            print("")
            st = "\nSequences for orthogroups:\n   %s\n" % seqs_alignments_dirs[0]
            st += "\nMultiple sequence alignments:\n   %s\n" % seqs_alignments_dirs[1]
            return st
        db = DendroBLASTTrees(ogSet, resultsDir, nLowParrallel)
        if not userSpeciesTree:
            util.PrintUnderline("Inferring species tree (calculating gene distances)")
            print("Loading BLAST scores")
            db.ReadAndPickle()
            spTreeFN_ids, spTreeUnrootedFN = db.SpeciesTreeOnly()
        if qPhyldog:
            trees_from_phyldog.RunPhyldogAnalysis(resultsDir + "WorkingDirectory/phyldog/", ogSet.OGs(), speciesToUse)
            return "Running Phyldog" + "\n".join(seqs_alignments_dirs)       
    else:
        util.PrintUnderline("Calculating gene distances")
        db = DendroBLASTTrees(ogSet, resultsDir, nLowParrallel)
        db.ReadAndPickle()
        nOGs, D, spTreeFN_ids, spTreeUnrootedFN = db.RunAnalysis()
    
    """ === 2 ===
    Check can continue with analysis 
    """
    if len(ogSet.speciesToUse) < 4: 
        print("ERROR: Not enough species to infer species tree")
        util.Fail()
     
    """ === 3 ===
    MSA:               RootSpeciesTree
    Phyldog:           RootSpeciesTree    
    Dendroblast:       RootSpeciesTree  
    MSA (ust):         ConvertSpeciesTreeIDs
    Phyldog (ust):     ConvertSpeciesTreeIDs
    Dendroblast (ust): ConvertSpeciesTreeIDs
    """    
    if userSpeciesTree:
        util.PrintUnderline("Using user-supplied species tree") 
        userSpeciesTree = ConvertUserSpeciesTree(db.workingDir + "Trees_ids/", userSpeciesTree, ogSet.SpeciesDict())
        rootedSpeciesTreeFN = [userSpeciesTree]
        roots = [None]
        qMultiple = False
    else:
        util.PrintUnderline("Best outgroup(s) for species tree") 
        spDict = ogSet.SpeciesDict()
        roots, clusters, rootedSpeciesTreeFN, nSupport = rfd.GetRoot(spTreeFN_ids, os.path.split(db.TreeFilename_IDs(0))[0] + "/", rfd.GeneToSpecies_dash, nHighParallel, treeFmt = 1)
        if len(roots) > 1:
            print("Observed %d duplications. %d support the best roots and %d contradict them." % (len(clusters), nSupport, len(clusters) - nSupport))
            print("Best outgroups for species tree:")  
        else:
            print("Observed %d duplications. %d support the best root and %d contradict it." % (len(clusters), nSupport, len(clusters) - nSupport))
            print("Best outgroup for species tree:")  
        for r in roots: print("  " + (", ".join([spDict[s] for s in r]))  )
        qMultiple = len(roots) > 1
        
    if qStopAfterTrees:
        if userSpeciesTree:
            st = ""
            if qMSA:
                st += "\nSequences for orthogroups:\n   %s\n" % seqs_alignments_dirs[0]
                st += "\nMultiple sequence alignments:\n   %s\n" % seqs_alignments_dirs[1]
            st += "\nGene trees:\n   %s\n" % (resultsDir + "Gene_Trees/")
            return st
        # otherwise, root species tree
        resultsSpeciesTrees = []
        for i, (r, speciesTree_fn) in enumerate(zip(roots, rootedSpeciesTreeFN)):
            if len(roots) == 1:
                resultsSpeciesTrees.append(resultsDir + "SpeciesTree_rooted.txt")
            else:
                resultsSpeciesTrees.append(resultsDir + "SpeciesTree_rooted_at_outgroup_%d.txt" % i)
            util.RenameTreeTaxa(speciesTree_fn, resultsSpeciesTrees[-1], db.ogSet.SpeciesDict(), qFixNegatives=True)
        db.DeleteBlastMatrices()
        CleanWorkingDir(db.workingDir)
        return GetResultsFilesString(resultsSpeciesTrees, seqs_alignments_dirs if qMSA else None, False)
    
    if qMultiple: util.PrintUnderline("\nAnalysing each of the potential species tree roots", True)
    resultsSpeciesTrees = []
    for i, (r, speciesTree_fn) in enumerate(zip(roots, rootedSpeciesTreeFN)):
        util.PrintUnderline("Reconciling gene trees and species tree" + (" (root %d)"%i if qMultiple else "")) 
        if qMultiple: 
            resultsDir_new = resultsDir + "Orthologues_using_outgroup_%d/" % i
            reconTreesRenamedDir = db.workingDir + "Recon_Gene_Trees_using_outgroup_%d/" % i
            resultsSpeciesTrees.append(resultsDir_new + "SpeciesTree_rooted_at_outgroup_%d.txt" % i)
            print("Outgroup: " + (", ".join([spDict[s] for s in r])))
        elif userSpeciesTree:
            resultsDir_new = resultsDir + "Orthologues/"
            reconTreesRenamedDir = db.workingDir + "Recon_Gene_Trees/"
            resultsSpeciesTrees.append(resultsDir + "SpeciesTree_rooted.txt")
        else:
            resultsDir_new = resultsDir + "Orthologues/"
            reconTreesRenamedDir = db.workingDir + "Recon_Gene_Trees/"
            resultsSpeciesTrees.append(resultsDir + "SpeciesTree_rooted.txt")
            print("Outgroup: " + (", ".join([spDict[s] for s in r])))
        os.mkdir(resultsDir_new)
        util.RenameTreeTaxa(speciesTree_fn, resultsSpeciesTrees[-1], db.ogSet.SpeciesDict(), qFixNegatives=True)
        ReconciliationAndOrthologues(db.TreeFilename_IDs, db.ogSet, speciesTree_fn, db.workingDir, resultsDir_new, reconTreesRenamedDir, nHighParallel, i if qMultiple else None, pickleDir=pickleDir) 
    
    db.DeleteBlastMatrices()
    CleanWorkingDir(db.workingDir)
    util.PrintUnderline("Writing results files", True)
    
    return GetResultsFilesString(resultsSpeciesTrees, seqs_alignments_dirs if qMSA else None)
Пример #18
0
def OrthologuesWorkflow(speciesToUse, nSpAll, 
                       tree_options,
                       msa_method,
                       tree_method,
                       recon_method,
                       nHighParallel,
                       nLowParrallel,
                       qDoubleBlast,
                       qAddSpeciesToIDs,
                       userSpeciesTree = None, 
                       qStopAfterSeqs = False,
                       qStopAfterAlign = False,
                       qStopAfterTrees = False, 
                       qMSA = False,
                       qPhyldog = False,
                       results_name = ""):
    """
    1. Setup:
        - ogSet, directories
        - DendroBLASTTress - object
    2. DendrobBLAST:
        - read scores
        - RunAnalysis: Get distance matrices, do trees
    3. Root species tree
    4. Reconciliation/Orthologues
    5. Clean up
    
    Variables:
    - ogSet - all the relevant information about the orthogroups, species etc.
    """
    ogSet = OrthoGroupsSet(files.FileHandler.GetWorkingDirectory1_Read(), speciesToUse, nSpAll, qAddSpeciesToIDs, idExtractor = util.FirstWordExtractor)
    
    tree_generation_method = "msa" if qMSA or qPhyldog else "dendroblast"
    stop_after = "seqs" if qStopAfterSeqs else "align" if qStopAfterAlign else ""
    files.FileHandler.MakeResultsDirectory2(tree_generation_method, stop_after, results_name)    
    """ === 1 === ust = UserSpeciesTree
    MSA:               Sequences    Alignments                        GeneTrees    db    SpeciesTree
    Phyldog:           Sequences    Alignments                        GeneTrees    db    SpeciesTree  
    Dendroblast:                                  DistanceMatrices    GeneTrees    db    SpeciesTree
    MSA (ust):         Sequences    Alignments                        GeneTrees    db
    Phyldog (ust):     Sequences    Alignments                        GeneTrees    db      
    Dendroblast (ust):                            DistanceMatrices    GeneTrees    db        
    """
    qDB_SpeciesTree = False
    if userSpeciesTree:
        util.PrintUnderline("Using user-supplied species tree") 
        spTreeFN_ids = files.FileHandler.GetSpeciesTreeUnrootedFN()
        ConvertUserSpeciesTree(userSpeciesTree, ogSet.SpeciesDict(), spTreeFN_ids)
    
    if qMSA or qPhyldog:
        qLessThanFourSpecies = len(ogSet.seqsInfo.speciesToUse) < 4
        treeGen = trees_msa.TreesForOrthogroups(tree_options, msa_method, tree_method)       
        if (not userSpeciesTree) and qLessThanFourSpecies:
            spTreeFN_ids = files.FileHandler.GetSpeciesTreeUnrootedFN()
            WriteSpeciesTreeIDs_TwoThree(ogSet.seqsInfo.speciesToUse, spTreeFN_ids)
            util.RenameTreeTaxa(spTreeFN_ids, files.FileHandler.GetSpeciesTreeUnrootedFN(True), ogSet.SpeciesDict(), qSupport=False, qFixNegatives=True)
        qDoMSASpeciesTree = (not qLessThanFourSpecies) and (not userSpeciesTree)
        util.PrintTime("Starting MSA/Trees")
        seqs_alignments_dirs = treeGen.DoTrees(ogSet.OGs(qInclAll=True), ogSet.OrthogroupMatrix(), ogSet.Spec_SeqDict(), ogSet.SpeciesDict(), ogSet.speciesToUse, nHighParallel, qStopAfterSeqs, qStopAfterAlign or qPhyldog, qDoSpeciesTree=qDoMSASpeciesTree) 
        util.PrintTime("Done MSA/Trees")
        if qDoMSASpeciesTree:
            spTreeFN_ids = files.FileHandler.GetSpeciesTreeUnrootedFN()
        if qStopAfterSeqs:
            print("")
            return ("\nSequences for orthogroups:\n   %s\n" % seqs_alignments_dirs[0])
        elif qStopAfterAlign:
            print("")
            st = "\nSequences for orthogroups:\n   %s\n" % seqs_alignments_dirs[0]
            st += "\nMultiple sequence alignments:\n   %s\n" % seqs_alignments_dirs[1]
            return st
        db = DendroBLASTTrees(ogSet, nLowParrallel, qDoubleBlast)
        if qDB_SpeciesTree and not userSpeciesTree and not qLessThanFourSpecies:
            util.PrintUnderline("Inferring species tree (calculating gene distances)")
            print("Loading BLAST scores")
            spTreeFN_ids = db.SpeciesTreeOnly()
        if qPhyldog:
#            util.PrintTime("Do species tree for phyldog")
#            spTreeFN_ids, spTreeUnrootedFN = db.SpeciesTreeOnly()
            if userSpeciesTree: 
                userSpeciesTree = ConvertUserSpeciesTree(userSpeciesTree, ogSet.SpeciesDict(), files.FileHandler.GetSpeciesTreeUnrootedFN())
            util.PrintTime("Starting phyldog")
            species_tree_ids_labelled_phyldog = wrapper_phyldog.RunPhyldogAnalysis(files.FileHandler.GetPhyldogWorkingDirectory(), ogSet.OGs(), speciesToUse, nHighParallel)
    else:
        db = DendroBLASTTrees(ogSet, nLowParrallel, qDoubleBlast)
        spTreeFN_ids, qSTAG = db.RunAnalysis()
    files.FileHandler.LogWorkingDirectoryTrees()
    qSpeciesTreeSupports = False if (userSpeciesTree or qMSA or qPhyldog) else qSTAG
    """
    SpeciesTree
    spTreeFN_ids, or equivalently FileHandler.GetSpeciesTreeUnrootedFN() in all cases (user, inferred etc)
    Thus, we always have the species tree ids format
    
    With phyldog, we also have species_tree_ids_labelled_phyldog - with the node labels given by phyldog
    """    
    
    """ === 2 ===
    Check can continue with analysis 
    """
#    if len(ogSet.speciesToUse) < 4: 
#        print("ERROR: Not enough species to infer species tree")
#        util.Fail()
     
    """ === 3 ===
    MSA:               RootSpeciesTree
    Phyldog:           RootSpeciesTree    
    Dendroblast:       RootSpeciesTree  
    MSA (ust):         ConvertSpeciesTreeIDs
    Phyldog (ust):     ConvertSpeciesTreeIDs
    Dendroblast (ust): ConvertSpeciesTreeIDs
    """    
    if qPhyldog:
        rootedSpeciesTreeFN = [species_tree_ids_labelled_phyldog]
        roots = [None]
        qMultiple = False
        all_stride_dup_genes = None
    elif userSpeciesTree:
        rootedSpeciesTreeFN = [spTreeFN_ids]
        roots = [None]
        qMultiple = False
        all_stride_dup_genes = None
    elif len(ogSet.seqsInfo.speciesToUse) == 2:
        hardcodeSpeciesTree = GetSpeciesTreeRoot_TwoTaxa(ogSet.seqsInfo.speciesToUse)
        rootedSpeciesTreeFN = [hardcodeSpeciesTree]
        roots = [None]
        qMultiple = False
        all_stride_dup_genes = None
    else:
        util.PrintUnderline("Best outgroup(s) for species tree") 
        util.PrintTime("Starting STRIDE")
        roots, clusters_counter, rootedSpeciesTreeFN, nSupport, _, _, all_stride_dup_genes = stride.GetRoot(spTreeFN_ids, files.FileHandler.GetOGsTreeDir(), stride.GeneToSpecies_dash, nHighParallel, qWriteRootedTree=True)
        util.PrintTime("Done STRIDE")
        nAll = sum(clusters_counter.values())
        nFP_mp = nAll - nSupport
        n_non_trivial = sum([v for k, v in clusters_counter.items() if len(k) > 1])
        if len(roots) > 1:
            print("Observed %d well-supported, non-terminal duplications. %d support the best roots and %d contradict them." % (n_non_trivial, n_non_trivial-nFP_mp, nFP_mp))
            print("Best outgroups for species tree:")  
        else:
            print("Observed %d well-supported, non-terminal duplications. %d support the best root and %d contradict it." % (n_non_trivial, n_non_trivial-nFP_mp, nFP_mp))
            print("Best outgroup for species tree:")  
        spDict = ogSet.SpeciesDict()
        for r in roots: print("  " + (", ".join([spDict[s] for s in r]))  )
        qMultiple = len(roots) > 1
    shutil.copy(rootedSpeciesTreeFN[0], files.FileHandler.GetSpeciesTreeIDsRootedFN())
        
    """
    SpeciesTree:
    We now have a list of rooted species trees: rootedSpeciesTreeFN (this should be recorded by the file handler)
    """
        
    if qStopAfterTrees:
        if userSpeciesTree:
            st = ""
            if qMSA:
                st += "\nSequences for orthogroups:\n   %s\n" % seqs_alignments_dirs[0]
                st += "\nMultiple sequence alignments:\n   %s\n" % seqs_alignments_dirs[1]
            st += "\nGene trees:\n   %s\n" % (files.FileHandler.GetResultsTreesDir())
            return st
        # otherwise, root species tree
        resultsSpeciesTrees = []
        for i, (r, speciesTree_fn) in enumerate(zip(roots, rootedSpeciesTreeFN)):
            resultsSpeciesTrees.append(files.FileHandler.GetSpeciesTreeResultsFN(i, not qMultiple))
            util.RenameTreeTaxa(speciesTree_fn, resultsSpeciesTrees[-1], db.ogSet.SpeciesDict(), qSupport=qSpeciesTreeSupports, qFixNegatives=True)
            labeled_tree_fn = files.FileHandler.GetSpeciesTreeResultsNodeLabelsFN()
            util.RenameTreeTaxa(speciesTree_fn, labeled_tree_fn, db.ogSet.SpeciesDict(), qSupport=False, qFixNegatives=True, label='N')
        files.FileHandler.CleanWorkingDir2()
        return GetResultsFilesString(resultsSpeciesTrees, seqs_alignments_dirs if qMSA else None, False)
    
    if qMultiple: util.PrintUnderline("\nMultiple potential species tree roots were identified, only one will be analyed.", True)
    resultsSpeciesTrees = []
    i = 0
    r = roots[0]
    speciesTree_fn = rootedSpeciesTreeFN[0]
    util.PrintUnderline("Reconciling gene trees and species tree")         
    resultsSpeciesTrees.append(files.FileHandler.GetSpeciesTreeResultsFN(0, True))
    if (not userSpeciesTree) and (not qPhyldog) and len(ogSet.seqsInfo.speciesToUse) != 2:
        print("Outgroup: " + (", ".join([spDict[s] for s in r])))
    util.RenameTreeTaxa(speciesTree_fn, resultsSpeciesTrees[-1], db.ogSet.SpeciesDict(), qSupport=qSpeciesTreeSupports, qFixNegatives=True)
    util.PrintTime("Starting Recon and orthologues")
    ReconciliationAndOrthologues(recon_method, db.ogSet, nHighParallel, i if qMultiple else None, all_stride_dup_genes=all_stride_dup_genes) 
    util.PrintTime("Done Recon")
    
    if qMultiple:
        for i, (r, speciesTree_fn) in enumerate(zip(roots, rootedSpeciesTreeFN)):
            unanalysedSpeciesTree = files.FileHandler.GetSpeciesTreeResultsFN(i, False)
            util.RenameTreeTaxa(speciesTree_fn, unanalysedSpeciesTree, db.ogSet.SpeciesDict(), qSupport=qSpeciesTreeSupports, qFixNegatives=True, label='N')
    
    """
    SpeciesTree: If it's been inferred, there is now at least one rooted results species trees: GetSpeciesTreeResultsFN()
    """
    
    files.FileHandler.CleanWorkingDir2()
    util.PrintUnderline("Writing results files", True)
    
    return GetResultsFilesString(resultsSpeciesTrees, seqs_alignments_dirs if qMSA else None)
Пример #19
0
    def DoTrees(self, ogs, ogMatrix, idDict, speciesIdDict, speciesToUse,
                qOutputCommands, nProcesses, qStopAfterSeqs,
                qStopAfterAlignments, qDoSpeciesTree):
        idDict.update(
            speciesIdDict
        )  # smae code will then also convert concatenated alignment for species tree
        # 0
        resultsDirsFullPath = [
            files.FileHandler.GetResultsSeqsDir(),
            files.FileHandler.GetResultsAlignDir(),
            files.FileHandler.GetResultsTreesDir()
        ]

        # 1.
        fastaWriter = FastaWriter(files.FileHandler.GetSpeciesSeqsDir(),
                                  speciesToUse)
        self.WriteFastaFiles(fastaWriter, ogs, idDict, True)
        if qStopAfterSeqs: return resultsDirsFullPath

        job_files = []

        # 3
        # Get OGs to use for species tree
        if qDoSpeciesTree:
            iOgsForSpeciesTree, fSingleCopy = DetermineOrthogroupsForSpeciesTree(
                ogMatrix)
            concatenated_algn_fn = files.FileHandler.GetSpeciesTreeConcatAlignFN(
            )
        else:
            iOgsForSpeciesTree = []
        alignCommands_and_filenames = self.GetAlignmentCommandsAndNewFilenames(
            ogs)
        if qStopAfterAlignments:
            util.PrintUnderline("Inferring multiple sequence alignments")
            if qOutputCommands:
                job_files.append(
                    CreateMsaJob(alignCommands_and_filenames, len(job_files)))
            else:
                pc.RunParallelCommandsAndMoveResultsFile(
                    nProcesses, alignCommands_and_filenames, False)
            if qDoSpeciesTree:
                if qOutputCommands:
                    job_files.append(
                        CreateConcatenatedAlignmentJob(iOgsForSpeciesTree,
                                                       concatenated_algn_fn,
                                                       fSingleCopy,
                                                       len(job_files)))
                else:
                    CreateConcatenatedAlignment(iOgsForSpeciesTree, ogs,
                                                self.GetAlignmentFilename,
                                                concatenated_algn_fn,
                                                fSingleCopy)

            # ids -> accessions
            alignmentFilesToUse = [
                self.GetAlignmentFilename(i)
                for i, _ in enumerate(alignCommands_and_filenames)
            ]
            accessionAlignmentFNs = [
                self.GetAlignmentFilename(i, True)
                for i in xrange(len(alignmentFilesToUse))
            ]
            if qDoSpeciesTree:
                alignmentFilesToUse.append(concatenated_algn_fn)
                accessionAlignmentFNs.append(
                    files.FileHandler.GetSpeciesTreeConcatAlignFN(True))
            if qOutputCommands:
                # TODO: make rename alignment taxa command
                util.PrintUnderline("Execute the commands in " +
                                    ','.join(job_files))
            else:
                self.RenameAlignmentTaxa(alignmentFilesToUse,
                                         accessionAlignmentFNs, idDict)
            return resultsDirsFullPath[:2]

        # Otherwise, alignments and trees
        # Strategy is
        # 1. Do alignments (and trees) require for species tree
        # 2. Create concatenated alignment
        # 3. Create second list of commands [speciestree] + [remaining alignments and trees]
        alignmentFilesToUse = [
            self.GetAlignmentFilename(i)
            for i, _ in enumerate(alignCommands_and_filenames)
        ]
        treeCommands_and_filenames = self.GetTreeCommands(
            alignmentFilesToUse, ogs)
        commands_and_filenames = []
        if qDoSpeciesTree:
            print(
                "Species tree: Using %d orthogroups with minimum of %0.1f%% of species having single-copy genes in any orthogroup"
                % (len(iOgsForSpeciesTree), 100. * fSingleCopy))
            util.PrintUnderline(
                "Inferring multiple sequence alignments for species tree")
            # Do required alignments and trees
            speciesTreeFN_ids = files.FileHandler.GetSpeciesTreeUnrootedFN()
            for i in iOgsForSpeciesTree:
                commands_and_filenames.append([
                    alignCommands_and_filenames[i],
                    treeCommands_and_filenames[i]
                ])
            if qOutputCommands:
                job_files.append(
                    CreateMsaJob(commands_and_filenames, len(job_files)))
                job_files.append(
                    CreateConcatenatedAlignmentJob(iOgsForSpeciesTree,
                                                   concatenated_algn_fn,
                                                   fSingleCopy,
                                                   len(job_files)))
            else:
                pc.RunParallelCommandsAndMoveResultsFile(
                    nProcesses, commands_and_filenames, True)
                CreateConcatenatedAlignment(iOgsForSpeciesTree, ogs,
                                            self.GetAlignmentFilename,
                                            concatenated_algn_fn, fSingleCopy)
                # Add species tree to list of commands to run
            commands_and_filenames = [
                self.program_caller.GetTreeCommands(self.tree_program,
                                                    [concatenated_algn_fn],
                                                    [speciesTreeFN_ids],
                                                    ["SpeciesTree"])
            ]
            if qOutputCommands:
                job_files.append(
                    CreateSpeciesTreeJob(commands_and_filenames,
                                         len(job_files)))
                commands_and_filenames = []
            util.PrintUnderline(
                "Inferring remaining multiple sequence alignments and gene trees"
            )
        else:
            util.PrintUnderline(
                "Inferring multiple sequence alignments and gene trees")

        # Now continue as before
        iOgsForSpeciesTree = set(iOgsForSpeciesTree)
        for i in xrange(len(treeCommands_and_filenames)):
            if i in iOgsForSpeciesTree: continue
            commands_and_filenames.append([
                alignCommands_and_filenames[i], treeCommands_and_filenames[i]
            ])
        for i in xrange(len(treeCommands_and_filenames),
                        len(alignCommands_and_filenames)):
            if i in iOgsForSpeciesTree: continue
            commands_and_filenames.append([alignCommands_and_filenames[i]])
        if qOutputCommands:
            job_files.append(
                CreateOGTreesJob(commands_and_filenames, len(job_files)))
        else:
            pc.RunParallelCommandsAndMoveResultsFile(nProcesses,
                                                     commands_and_filenames,
                                                     True)

        # Convert ids to accessions
        accessionAlignmentFNs = [
            self.GetAlignmentFilename(i, True)
            for i in xrange(len(alignmentFilesToUse))
        ]
        # Add concatenated Alignment
        if qDoSpeciesTree:
            if qOutputCommands:
                job_files.append(
                    CreateRenameTaxaJob([
                        (concatenated_algn_fn,
                         files.FileHandler.GetSpeciesTreeConcatAlignFN(True))
                    ], [(speciesTreeFN_ids,
                         files.FileHandler.GetSpeciesTreeUnrootedFN(True))],
                                        len(job_files)))
            else:
                qHaveSupport = util.HaveSupportValues(speciesTreeFN_ids)
                alignmentFilesToUse.append(concatenated_algn_fn)
                accessionAlignmentFNs.append(
                    files.FileHandler.GetSpeciesTreeConcatAlignFN(True))
                if os.path.exists(speciesTreeFN_ids):
                    util.RenameTreeTaxa(
                        speciesTreeFN_ids,
                        files.FileHandler.GetSpeciesTreeUnrootedFN(True),
                        idDict,
                        qSupport=qHaveSupport,
                        qFixNegatives=True)
                else:
                    text = "ERROR: Species tree inference failed"
                    files.FileHandler.LogFailAndExit(text)

        if qOutputCommands:
            job_files.append(
                CreateRenameTaxaJob(
                    zip(alignmentFilesToUse, accessionAlignmentFNs),
                    [(self.GetTreeFilename(i), self.GetTreeFilename(i, True))
                     for i in xrange(len(treeCommands_and_filenames))],
                    len(job_files)))
            if qOutputCommands:
                print(
                    "Run the commands contained in these files (each depends on the previous):\n"
                    + "\n".join(job_files))
                files.FileHandler.LogWorkingDirectoryTrees()
        else:
            self.RenameAlignmentTaxa(alignmentFilesToUse,
                                     accessionAlignmentFNs, idDict)
            qHaveSupport = None
            for i in xrange(len(treeCommands_and_filenames)):
                infn = self.GetTreeFilename(i)
                if os.path.exists(infn):
                    if qHaveSupport == None:
                        qHaveSupport = util.HaveSupportValues(infn)
                    util.RenameTreeTaxa(infn,
                                        self.GetTreeFilename(i, True),
                                        idDict,
                                        qSupport=qHaveSupport,
                                        qFixNegatives=True)

        return resultsDirsFullPath[:2]