def __init__(self, stepName, isHomoComplex, savedModelsPath=None, averageLRscores=False): ''' :param stepName: str. Must startswith seq_train or struct or mixed (seq_train, mixed_2, structX, seq_train1... are also valid) :param isHomoComplex: boolean. Is the target complex h**o or hetero :param savedModelsPath: str. A path to the directory where models have been saved. If None, it will used the path indicated in Config :param averageLRscores: True if Ligand and receptor are the same protein and thus, binding site prediction should be averaged ''' Configuration.__init__(self) self.isHomoComplex = isHomoComplex self.stepName = stepName self.averageLRscores = averageLRscores if not savedModelsPath is None: self.savedModelsPath = savedModelsPath self.model = None print(stepName) self.savedModelsPath = os.path.join( self.savedModelsPath, "h**o" if self.isHomoComplex else "hetero") for fname in os.listdir(self.savedModelsPath): if fname.endswith(stepName): print("Loading model %s %s" % ("h**o" if isHomoComplex else "hetero", fname)) self.model = joblib_load( os.path.join(self.savedModelsPath, fname)) assert not self.model is None, "Error, there is no valid model in %s for step %s" % ( self.savedModelsPath, self.stepName)
def __init__(self, rFname, lFname, computedFeatsRootDir=None, statusManager=None): ''' @param rFname: str. path to receptor pdb file @param lFname: str. path to ligand pdb file @param computedFeatsRootDir: str. path where features will be stored @param statusManager: class that implements .setStatus(msg) to communicate ''' Configuration.__init__( self) # Load configuration parameters such as path to programs self.statusManager = statusManager if computedFeatsRootDir != None: self.computedFeatsRootDir = computedFeatsRootDir self.computedFeatsRootDir = os.path.expanduser( self.computedFeatsRootDir ) #Creates root path where features will be saved myMakeDir(self.computedFeatsRootDir) self.rFname = rFname self.lFname = lFname if not os.path.isdir(self.computedFeatsRootDir): os.mkdir(self.computedFeatsRootDir)
def __init__(self, dataRootPath, singleChainfeatsToInclude, pairfeatsToInclude=None, verbose=False): ''' @param dataRootPath: str. A path to computedFeatures directory that contains needed features. Example: computedFeatures/ common/ contactMaps/ seqStep/ conservation/ ... structStep/ PSAIA/ VORONOI/ ... @param singleChainfeatsToInclude: dict. Dictionary that contains the paths where features needed for complex codification that describre single chain properties are located. Must have the following format: {"featName":(relativePath_from_dataRootPath, listOfColumnNumbers, dictForNamedColums)} dictForNamedColums= {"myFeatName":colNumber} @param pairfeatsToInclude: dict. Dictionary that contains the paths where features needed for complex codification that describre single chain properties are located. Must have the following format: {"featName":(relativePath_from_dataRootPath, listOfColumnNumbers, dictForNamedColums)} dictForNamedColums= {"myFeatName":colNumber} @param verbose: bool. ''' Configuration.__init__(self) self.dataRootPath = dataRootPath self.verbose = verbose self.singleChainfeatsToInclude = singleChainfeatsToInclude self.pairfeatsToInclude = None if pairfeatsToInclude is None else pairfeatsToInclude[:]
def __init__(self, consDbSqlite=None, consDbFilesPath=None): Configuration.__init__(self) self.isReady = True if consDbFilesPath: self.consDbFilesPath = consDbFilesPath if consDbSqlite: self.consDbSqlite = consDbSqlite self.unirefType = None if not os.path.isfile( self.consDbSqlite) or not self.checkIfDbFilesAvailable(): self.isReady = False else: self.unirefType = os.path.basename(self.consDbFilesPath) assert self.unirefType in ["uniref90", "uniref100"], ("Error, consDbFilesPath %s "+\ ":must be path/to/data/[uniref90|uniref100]")%(self.consDbFilesPath) self.sqliteConn = sqlite3.connect(self.consDbSqlite) self.sqliteCursor = self.sqliteConn.cursor() try: #check if sqlite was correctly opened self.sqliteCursor.execute( "SELECT seqId FROM sequencesTable where sequence== 0" ).fetchone() except sqlite3.OperationalError: self.isReady = False
def __init__(self, trainDataPath, testPath, outputPath=None, nFolds=None, isLastStep=False, saveModelFname=None, verbose=True, numProc=1): ''' builder :param trainDataPath: str. Path to a dir where training data files are stored :param testPath: str. Path to a dir where testing data files are stored :param outputPath: str. Path to a dir where predictions will be stored. If None, results will not be saved and just performance evaluation will be carried out :param nFolds: int. Number of folds for k-fold cross-validation. If -1, leave-one-out will be performed. If 0, testing set will be used as if it where independent. Cannot be 1 :param isLastStep: bool. True if this train is the second step of a two steps workflow or the first one in one step workflow :param saveModelFname: str. A path where the final model, trained with all data will be saved. If None, model won't be saved :param verbose: boolean. Whether or not print to stdout info :param numProc: int. Number of processes to use in parallel ''' Configuration.__init__( self) # Load configuration parameters such as path to programs parentPath, __ = os.path.split(trainDataPath) parentPath, stepName = os.path.split(parentPath) parentPath, __ = os.path.split(parentPath) self.outputPath = outputPath self.saveModelFname = saveModelFname self.verbose = verbose self.numProc = numProc self.nFolds = nFolds self.isLastStep = isLastStep self.trainPath = trainDataPath trainFilesNames = sorted(os.listdir(self.trainPath)) self.trainPrefixes = sorted( set([fname.split(".")[0] for fname in trainFilesNames])) self.testPath = testPath if not self.testPath is None: testFilesNames = sorted(os.listdir(self.testPath)) self.testPrefixes = sorted( set([fname.split(".")[0] for fname in testFilesNames])) else: self.testPrefixes = [] self.data, self.prefixesUsedInModel = None, None # self.loadTrainingData(sharedMemoryPath=None) will be executed latter if self.verbose: print("%d train complexes loaded." % (len(self.trainPrefixes))) self.numTestComplexes = 0 if self.testPrefixes == None else len( self.testPrefixes)
def __init__(self, computedFeatsRootDir, statusManager=None): ''' :param computedFeatsRootDir: str. root path where results will be saved :param statusManager: class that implements .setStatus(msg) to communicate ''' Configuration.__init__(self) self.computedFeatsRootDir = computedFeatsRootDir self.filterOutLabels = FILTER_OUT_LABELS self.filterOutNoStandard = IGNORE_NO_STANDARD self.statusManager = statusManager
def __init__(self, data_path= None): ''' @param data_path: str: Path where AAIndex files are located ''' Configuration.__init__(self) # Load configuration parameters such as path to programs self.protein_proteinIndexes=["KESO980101","KESO980102","MOOG990101"] if data_path is None: self.data_path= self.AAindexPath else: self.data_path=data_path self.data=self.load()
def __init__(self, seqsManager, outPath, winSize): ''' @param seqsManager: ..manageSeqs.seqsManager.SeqsManager @param outPath: str. root path where psiblast and al2co scores will be saved @param winSize: int. The size of sliding window ''' Configuration.__init__(self) self.seqsManager = seqsManager self.seqsWorkingDir = self.seqsManager.getSeqsOutDir() self.outPath = outPath self.winSize = winSize self.filterOutLabels = FILTER_OUT_LABELS
def __init__(self, prefix, computedFeatsRootDir=None, statusManager=None): ''' @prefix. An id for a complex. Example: 1A2K :param computedFeatsRootDir: str. path where features will be stored. If None, read from Confinguration :param statusManager: class that implements .setStatus(msg) to communicate ''' Configuration.__init__( self) # Load configuration parameters such as path to programs self.prefix = prefix self.statusManager = statusManager if computedFeatsRootDir != None: self.computedFeatsRootDir = computedFeatsRootDir self.computedFeatsRootDir = self.computedFeatsRootDir #Creates root path where features will be saved
def __init__(self, dataRootPath, verbose=False): ''' :param dataRootPath: str. A path to computedFeatures directory that contains needed features. Example: computedFeatures/ common/ contactMaps/ seqStep/ conservation/ ... structStep/ PSAIA/ ... :param verbose: bool. ''' Configuration.__init__(self) if dataRootPath is None: dataRootPath = self.computedFeatsRootDir self.dataRootPath = dataRootPath self.verbose = verbose
def __init__(self, stepName, savedModelsPath=None): ''' @param stepName: str. Must startswith seq_train or struct or mixed (seq_train, mixed_2, structX, seq_train1... are also valid) @param savedModelsPath: str. A path to the directory where models have been saved. If None, it will used the path indicated in Config ''' Configuration.__init__(self) self.stepName = stepName if not savedModelsPath is None: self.savedModelsPath = savedModelsPath self.model = None print(stepName) for fname in os.listdir(self.savedModelsPath): if fname.endswith(stepName): print("Loading model %s" % (fname)) self.model = joblib_load( os.path.join(self.savedModelsPath, fname)) assert not self.model is None, "Error, there is no valid model in %s for step %s" % ( self.savedModelsPath, self.stepName)
def __init__(self, features_path=None, out_Codified_path=None, feedback_paths=None, environType=None, ncpu=1, overridePrevComp=False, verbose=False): ''' :param features_path: str. A path to the computedFeatures directory that contains needed features. If None, Config.py DEFAULT_PARAMETERS["computedFeatsRootDir"] will be used Example: features_path/ common/ contactMaps/ seqStep/ conservation/ ... structStep/ PSAIA/ VORONOI/ ... :param out_Codified_path: str. Root directory where codified complexes will be saved. Files will be saved at directory out_Codified_path/seq[_n] if sequential environment protocol will be used or at out_Codified_path/struct[_n] if structural environment protocol will be used. (attribute self.environType) If more than one step of the same type, the path will end with "_#", p.e: 1) path/to/outpath/seq/ 2) path/to/outpath/struct_0/ 3) path/to/outpath/struct_1/ If None, Config.py DEFAULT_PARAMETERS["codifiedDataRootDir"] will be used as out_Codified_path :param feedback_paths: str or str[]. A path to a previous results files directory. Contact maps for evaluation will be obtained from this file. If None, contact maps will be loaded from contactMaps files contained at features_path/common/contactMaps/ :param environType: str. "seq" if sequential environment protocol want to be used (sliding window of pssms...) "struct" if VORONOI neighbours environment protocol want to be used (mean, min, max, sum and count for neighbour residues and their properties), "mixed" if one partner will be codified using struct environment and the other partner will be codified using sequence environment In the "mixed" case, both A_B and B_A will be considered :param ncpu: int. Number of processes to use in parallel (each process will codify one complex) :param overridePrevComp: boolean. If True and there are complexes at out_Codified_path, those complexes will be overrided. If False, already computed complexes will be kept and codification will continue with non computed complexes ''' Configuration.__init__(self) if not (environType.startswith("seq") or environType.startswith("struct") or environType.startswith("mixed")): raise CodifyComplexException( "environType must be 'seq' or 'struct' or mixed") self.environType = environType if features_path is None: features_path = self.computedFeatsRootDir self.dataRootPath = os.path.realpath(os.path.expanduser(features_path)) try: self.prefixes = sorted([ getPrefix(elem) for elem in os.listdir( os.path.join(os.path.expanduser(self.dataRootPath), "common", "contactMaps")) ]) except (OSError, IOError) as e: self.prefixes = None if out_Codified_path is None: out_Codified_path = self.codifiedDataRootDir self.out_Codified_path = myMakeDir( os.path.realpath(os.path.expanduser(out_Codified_path))) self.out_Codified_path = myMakeDir(self.out_Codified_path, environType) self.feedback_paths = feedback_paths #Either a path or None self.overridePrevComp = overridePrevComp self.verbose = verbose self.ncpu = ncpu self.testingDataPath = os.path.join(self.out_Codified_path, "allInputs") self.trainingDataPath = os.path.join(self.out_Codified_path, "sampledInputs") if self.ncpu > multiprocessing.cpu_count() or self.ncpu == -1: self.ncpu = multiprocessing.cpu_count() elif self.ncpu < 1: self.ncpu = 1
def __init__(self, msg): Configuration.__init__(self) MyException.__init__(self, msg)
def __init__(self, nResidues, partnerId): Configuration.__init__(self) MyException.__init__(self, "Bad number of residues for partner %s: %d. Number of residues must be %d < nResidues < %d"%( partnerId, nResidues, self.minNumResiduesPartner , self.maxNumResiduesPartner))