def __init__(self, orthofinderWorkingDir_list, speciesToUse, nSpAll, qAddSpeciesToIDs, idExtractor = util.FirstWordExtractor): self.speciesIDsEx = util.FullAccession(files.FileHandler.GetSpeciesIDsFN()) self._Spec_SeqIDs = None self._extractor = idExtractor self.seqIDsEx = None self.ogs_all = None self.iOgs4 = 0 self.speciesToUse = speciesToUse # list of ints self.seqsInfo = util.GetSeqsInfo(orthofinderWorkingDir_list, self.speciesToUse, nSpAll) self.id_to_og = None self.qAddSpeciesToIDs = qAddSpeciesToIDs
def SequenceDict(self): if self.seqIDsEx == None: try: self.seqIDsEx = self._extractor(self.seqIDsFN) except RuntimeError as error: print(error.message) if error.message.startswith("ERROR"): util.Fail() else: print("Tried to use only the first part of the accession in order to list the sequences in each orthogroup\nmore concisely but these were not unique. The full accession line will be used instead.\n") self.seqIDsEx = util.FullAccession(self.seqIDsFN) return self.seqIDsEx.GetIDToNameDict()
def __init__(self, orthofinderWorkingDir, speciesToUse, nSpAll, clustersFilename_pairs, idExtractor = util.FirstWordExtractor, pickleDir=None): self.workingDirOF = orthofinderWorkingDir self.seqIDsFN = orthofinderWorkingDir + "SequenceIDs.txt" self.speciesIDsFN = orthofinderWorkingDir + "SpeciesIDs.txt" self.speciesIDsEx = util.FullAccession(self.speciesIDsFN) self._Spec_SeqIDs = None self._extractor = idExtractor self.clustersFN = clustersFilename_pairs self.seqIDsEx = None self.ogs_all = None self.iOgs4 = 0 self.speciesToUse = speciesToUse self.seqsInfo = util.GetSeqsInfo(orthofinderWorkingDir, self.speciesToUse, nSpAll) self.fileInfo = util.FileInfo(workingDir = orthofinderWorkingDir, graphFilename="", separatePickleDir=pickleDir) self.id_to_og = None
def GetSpeciesDict(self): d = util.FullAccession(self.GetSpeciesIDsFN()).GetIDToNameDict() return {k: v.rsplit(".", 1)[0] for k, v in d.items()}