Пример #1
0
def GetSpeciesGenesInfo():
    speciesLabels, nSpAll, _ = util.GetSpeciesToUse(
        files.FileHandler.GetSpeciesIDsFN())
    seqsInfo = util.GetSeqsInfo(files.FileHandler.GetSpeciesSeqsDir(),
                                speciesLabels, nSpAll)
    genenumbers = list(np.diff(seqsInfo.seqStartingIndices))
    genenumbers.append(seqsInfo.nSeqs - seqsInfo.seqStartingIndices[-1])
    return speciesLabels, genenumbers
Пример #2
0
def OrthologuesFromTrees(groupsDir, workingDir, nHighParallel, speciesTree_fn = None, pickleDir=None):
    """
    groupsDir - directory with orthogroups file in
    userSpeciesTree_fn - None if not supplied otherwise rooted tree using user species names (not orthofinder IDs)
    workingDir - orthologues 'WorkingDirectory'
    qUserSpTree - is the speciesTree_fn user-supplied
    
    Just infer orthologues from trees, don't do any of the preceeding steps.
    """
    # Check species tree
    qUserSpTree = (speciesTree_fn != None)
    if qUserSpTree:
        if not os.path.exists(speciesTree_fn):
            print("\nERROR: %s does not exist\n" % speciesTree_fn)
            util.Fail()
    else:
        possibilities = ["SpeciesTree_ids_0_rooted.txt", "SpeciesTree_ids_1_rooted.txt", "SpeciesTree_user_ids.txt"] # etc (only need to determine if unique)
        nTrees = 0
        for p in possibilities:
            fn = workingDir + "Trees_ids/" + p
            if os.path.exists(fn): 
                nTrees += 1
                speciesTree_fn = fn
        if nTrees == 0:
            print("\nERROR: There is a problem with the specified directory. The rooted species tree %s or %s is not present." % (possibilities[0], possibilities[2]))
            print("Please rectify the problem or alternatively use the -s option to specify the species tree to use.\n")
            util.Fail()
        if nTrees > 1:
            print("\nERROR: There is more than one rooted species tree in the specified directory structure. Please use the -s option to specify which species tree should be used\n")
            util.Fail()
    
    def TreePatIDs(iog):
        return workingDir + ("Trees_ids/OG%07d_tree_id.txt" % iog)
    reconTreesRenamedDir = workingDir + "Recon_Gene_Trees/"
    resultsDir_new = workingDir + "../Orthologues"      # for the Orthologues_Species/ directories
#    if os.path.exists(resultsDir_new):
    resultsDir_new = util.CreateNewWorkingDirectory(resultsDir_new + "_")
#    else:
#        resultsDir_new += os.sep
#        os.mkdir(resultsDir_new)
    orthofinderWorkingDir, orthofinderResultsDir, clustersFilename_pairs = util.GetOGsFile(groupsDir)
    speciesToUse, nSpAll = util.GetSpeciesToUse(orthofinderWorkingDir + "SpeciesIDs.txt")    
    ogSet = OrthoGroupsSet(orthofinderWorkingDir, speciesToUse, nSpAll, clustersFilename_pairs, idExtractor = util.FirstWordExtractor)
    if qUserSpTree:
        speciesToUseNames = ogSet.SpeciesDict().values()
        CheckUserSpeciesTree(speciesTree_fn, speciesToUseNames)
        speciesTree_fn = ConvertUserSpeciesTree(workingDir + "Trees_ids/", speciesTree_fn, ogSet.SpeciesDict())
    util.PrintUnderline("Running Orthologue Prediction", True)
    util.PrintUnderline("Reconciling gene and species trees") 
    ReconciliationAndOrthologues(TreePatIDs, ogSet, speciesTree_fn, workingDir, resultsDir_new, reconTreesRenamedDir, nHighParallel, pickleDir=pickleDir)
    util.PrintUnderline("Writing results files")
    CleanWorkingDir(workingDir)
    return "Species-by-species orthologues directory:\n   %s\n" % resultsDir_new
Пример #3
0
def OrthologuesFromTrees(recon_method, nHighParallel, userSpeciesTree_fn, qAddSpeciesToIDs):
    """
    userSpeciesTree_fn - None if not supplied otherwise rooted tree using user species names (not orthofinder IDs)
    qUserSpTree - is the speciesTree_fn user-supplied
    
    Just infer orthologues from trees, don't do any of the preceeding steps.
    """
    speciesToUse, nSpAll, _ = util.GetSpeciesToUse(files.FileHandler.GetSpeciesIDsFN())    
    ogSet = OrthoGroupsSet(files.FileHandler.GetWorkingDirectory1_Read(), speciesToUse, nSpAll, qAddSpeciesToIDs, idExtractor = util.FirstWordExtractor)
    if userSpeciesTree_fn != None:
        speciesDict = files.FileHandler.GetSpeciesDict()
        speciesToUseNames = [speciesDict[str(iSp)] for iSp in ogSet.speciesToUse]
        CheckUserSpeciesTree(userSpeciesTree_fn, speciesToUseNames)
        speciesTreeFN_ids = files.FileHandler.GetSpeciesTreeIDsRootedFN()
        ConvertUserSpeciesTree(userSpeciesTree_fn, speciesDict, speciesTreeFN_ids)
    util.PrintUnderline("Running Orthologue Prediction", True)
    util.PrintUnderline("Reconciling gene and species trees") 
    ReconciliationAndOrthologues(recon_method, ogSet, nHighParallel)
    util.PrintUnderline("Writing results files")
    util.PrintTime("Writing results files")
    files.FileHandler.CleanWorkingDir2()
    return "Species-by-species orthologues directory:\n   %s\n" % files.FileHandler.GetOrthologuesDirectory()
Пример #4
0
def GetSpeciesGenesInfo(ogSet):
    speciesLabels, nSpAll = util.GetSpeciesToUse(ogSet.speciesIDsFN)
    seqsInfo = util.GetSeqsInfo(ogSet.workingDirOF, speciesLabels, nSpAll)
    genenumbers = list(np.diff(seqsInfo.seqStartingIndices))
    genenumbers.append(seqsInfo.nSeqs - seqsInfo.seqStartingIndices[-1])
    return speciesLabels, genenumbers
Пример #5
0
                print("Missing option for command line argument -t")
                util.Fail()
            arg = args.pop(0)
            try:
                nProcesses = int(arg)
            except:
                print("Incorrect argument for number of threads: %s" % arg)
                util.Fail()
        else:
            userDir = arg

    # Check arguments
    print("0. Getting Orthologues")
    print("----------------------")
    if nProcesses == None:
        print(
            """\nNumber of parallel processes has not been specified, will use the default value.  
Number of parallel processes can be specified using the -t option.""")
        nProcesses = util.nThreadsDefault
    print("Using %d threads for alignments and trees" % nProcesses)

    orthofinderWorkingDir, orthofinderResultsDir, clustersFilename_pairs = util.GetOGsFile(
        userDir)
    speciesToUse, nSpAll = util.GetSpeciesToUse(orthofinderWorkingDir +
                                                "SpeciesIDs.txt")
    resultsString = GetOrthologues(orthofinderWorkingDir,
                                   orthofinderResultsDir, speciesToUse, nSpAll,
                                   clustersFilename_pairs, nProcesses)
    print(resultsString)
    util.PrintCitation()