def bedSequenceComparator(self, bed_seq1, bed_seq2): if bed_seq1.species != bed_seq2.species: raise ExecutionException("CoLocationAnalysisProcessor.bedSequenceComparator : Unable to compare two BED sequences of differents species : '" + bed_seq1.species + "' != '" + bed_seq2.species + "'") if bed_seq1.chromosom != bed_seq2.chromosom: raise ExecutionException("CoLocationAnalysisProcessor.bedSequenceComparator : Unable to compare two BED sequences of differents chromosom : '" + bed_seq1.chromosom + "' != '" + bed_seq2.chromosom + "'") return bed_seq1.indexStart - bed_seq2.indexStart
def moveFile(origin_path, destination_path): if os.path.exists(origin_path): if not os.path.isfile(origin_path): raise ExecutionException( "FileUtils.moveFile: unable to move file. Provided path is not a file : " + origin_path) if not os.path.exists(destination_path): FileUtils.createDirectory(destination_path) shutil.move(origin_path, destination_path) else: raise ExecutionException( "FileUtils.moveFile: unable to move file. File does not exist : " + origin_path)
def parseClustalWResult(self, file_path, desired_species_list): try: result = {} length = 0 file = open(file_path, "r") for line in file: tokens = line.split() if tokens != None and len(tokens) == 2: species = tokens[0] if desired_species_list == None or len( desired_species_list ) == 0 or species in desired_species_list: if not species in result.keys(): result[species] = [] result[species].extend(tuple(tokens[1])) length = len(result[species]) result = self.removeFirstAndLastNoInfoColumns(result, length) alignment = SequenceAlignment() for species in result: alignment.addSequence(species, result[species]) file.close() return alignment except IOError, io_exce: raise ExecutionException( "MSAProcessor.parseClustalWResult : Unable to open the ClustalW result file : '" + file_path + "'. From:\n\t---> " + str(io_exce))
def execute(self, input_commstructs): if input_commstructs == None or len(input_commstructs) == 0: raise ExecutionException( "CompareIdentifiedMotifsProcessor.execute : No inputs") input_commstruct = input_commstructs[0] # Retrieve the processor parameters database_file = self.getParameter( CompareIdentifiedMotifsProcessor.MOTIF_DATABASE_FILE_PARAM) database_format = self.getParameter( CompareIdentifiedMotifsProcessor.MOTIF_DATABASE_FORMAT_PARAM) motif_list_line = self.getParameter( CompareIdentifiedMotifsProcessor.MOTIF_LIST_PARAM) motif_name_list = motif_list_line.split() # Retrieve the PWM of the reference motifs reference_motif_list = self.getMotifMatrices(motif_name_list, database_file, database_format) # Retrieve the list of identified motifs identified_motifs = self.getIdentifiedMotifs(input_commstruct) # Compare motifs self.compareMotifs(reference_motif_list, identified_motifs) return input_commstruct
def getMotifsDetailsFromTransfac( database_file_path = None): id = {} family = {} type = {} classe = {} if database_file_path == None: database_file_path = RSATUtils.RSAT_JASPAR_MOTIF_DATABASE try: database_file = open( database_file_path, "r") for line in database_file: # detect the transfac definition starting line if line[ 0:2] == "AC": tokens = line.split() motif_name = tokens[1] # get the definition until the definition final line ("//") for line in database_file: if line[ 0:2] == "ID": sub_tokens = line.split() id[ motif_name] = sub_tokens[ 1] if line[ 0:2] == "CC": sub_tokens = line.split() if sub_tokens[ 1].lower() == "family:": family[ motif_name] = sub_tokens[ 2] elif sub_tokens[ 1].lower() == "type:": type[ motif_name] = sub_tokens[ 2] elif sub_tokens[ 1].lower() == "class:": classe[ motif_name] = sub_tokens[ 2] elif line[0:2] == "//": break except IOError, io_exce: raise ExecutionException( "MotifUtils.getMotifsDetailsFromTransfac : Unable to read motif definition from database file '" + database_file_path + "'. From:\n\t---> " + str( io_exce))
def getMotifsSizesFromTransfac( database_file_path = None): sizes = {} if database_file_path == None: database_file_path = RSATUtils.RSAT_JASPAR_MOTIF_DATABASE try: database_file = open( database_file_path, "r") for line in database_file: # detect the transfac definition starting line if line[0:2] == "AC": tokens = line.split() motif_name = tokens[1] # get the definition until the definition final line ("//") for line in database_file: if line[0:2] == "PO": # read the values of the PWM and count the lines size = 0 for line in database_file: if line[0:2] != "XX": size += 1 else: break break elif line[0:2] == "//": break # assign the size to the corresponding motif name if size != 0: sizes[ motif_name] = size except IOError, io_exce: raise ExecutionException( "MotifUtils.getMotifsSizeFromTransfacDefinition : Unable to read motif definition from database file '" + database_file_path + "'. From:\n\t---> " + str( io_exce))
def addColumn(self, values): if len(values) != len(Constants.DNA_ALPHABET): raise ExecutionException( "PWM.addColumn : Incorrect number of residu values : " + str(values)) if self.matrix == None: self.matrix = {} for letter in Constants.DNA_ALPHABET: self.matrix[letter] = [] self.matrix[Constants.MAX_INDEX] = [] letter_index = 0 max = 0 letter_max = None for letter in sorted(Constants.DNA_ALPHABET): value = values[letter_index] self.matrix[letter].append(value) if value >= max: max = value letter_max = letter letter_index += 1 self.matrix[Constants.MAX_INDEX].append(letter_max) self.totalLength += 1
def execute(self, input_commstructs): if input_commstructs == None or len(input_commstructs) == 0: raise ExecutionException( "CompareStatisticsProcessor.execute : No inputs") self.compareMotifHistogram(input_commstructs)
def execute(self, input_commstructs): if input_commstructs == None or len(input_commstructs) == 0: raise ExecutionException( "ContingencyProcessor.execute : No inputs") input_commstruct = input_commstructs[0] # Retrieve the processor parameters reference_motif = self.getParameter( ContingencyProcessor.REFERENCE_MOTIF_PARAM) # Prepare the processor output dir dir_path = os.path.join(self.component.outputDir, self.component.getComponentPrefix()) shutil.rmtree(dir_path, True) FileUtils.createDirectory(dir_path, 0777) # Initialize motif contingency statistics for motif_name in input_commstruct.motifStatistics.keys(): motif_statistics = input_commstruct.motifStatistics[motif_name] motif_statistics.setAttribute( MotifStatistics.CONTIGENCY_MOTIF_COOCCURENCE, 0) motif_statistics.setAttribute( MotifStatistics.CONTIGENCY_REFERENCE_MOTIF_BEDSEQ, 0) motif_statistics.setAttribute( MotifStatistics.CONTINGENCY_TOTAL_BEDSEQ, 0) # Compute the contingency table input_commstruct = self.computeContingencyTable( input_commstruct, reference_motif) return input_commstruct
def initServerQueue(self): queue_file_path = os.path.join( self.config[PFConstants.QUEUE_DIR_PARAM], PFConstants.SERVER_QUEUE_FILE_NAME) if os.path.exists(queue_file_path): try: commands_list = [] file = FileUtils.openFile(queue_file_path) for line in file: command_params = [None, None, 0, "True", None] if not line.isspace( ) and line[0] != PFConstants.COMMENT_CHAR: tokens = line.split("|**|") if len(tokens) > 0 and len(tokens) <= 5: for index in range(len(tokens)): command_params[index] = tokens[index] commands_list.append(command_params) file.close() options = ast.literal_eval(command_params[1]) for command_params in commands_list: self.addToQueue(command_params[0], options, command_params[2], command_params[3], command_params[4]) except IOError, io_exce: raise ExecutionException( " PipelineManager.initServerQueue : Unable to read Server queue from file : " + queue_file_path + ". From:\n\t---> " + str(io_exce))
def execute( self, input_commstructs): if input_commstructs == None or len( input_commstructs) == 0: raise ExecutionException( "FinalOutputProcessor.execute : No inputs") input_commstruct = input_commstructs[0] # Retrieve the processor parameters self.dbPath = self.getParameter( FinalOutputProcessor.MOTIF_DATABASE_PATH_PARAM) # Retrieve the list of motif database files to use database_file_line = self.getParameter( FinalOutputProcessor.MOTIF_DATABASE_FILE_LIST_PARAM) if database_file_line != None and not database_file_line.isspace(): file_list = database_file_line.split() self.dbFiles = [] for file_path in file_list: self.dbFiles.append( os.path.join( self.dbPath, file_path)) else: raise ExecutionException( "FinalOutputProcessor.getMethodParameters : No motif database file specified in parameter '" + FinalOutputProcessor.MOTIF_DATABASE_FILE_LIST_PARAM + "'") # Add the custom motif database files if any custom_database_file_line = self.getParameter( FinalOutputProcessor.CUSTOM_MOTIF_DATABASE_FILE_PARAM, False) if custom_database_file_line != None and not custom_database_file_line.isspace(): self.dbFiles.append( custom_database_file_line) limit_value = self.getParameter( FinalOutputProcessor.DISPLAY_LIMIT_VALUE, False) if limit_value == None: limit_value = 1.0 # Prepare the processor output dir self.outPath = os.path.join( self.component.outputDir, self.component.getComponentPrefix()) shutil.rmtree( self.outPath, True) FileUtils.createDirectory( self.outPath, 0777) # Copy motif graph and stats files analysis = self.AnalyseMotifStats( input_commstruct) # Create motif logos self.createLogos( input_commstruct) # Output Results self.outputClassification( input_commstruct, analysis, limit_value) # Copy other information FileUtils.copyFile( os.path.join( self.component.outputDir, Constants.PROGRESSION_XSL_FILE), self.outPath) FileUtils.copyFile( os.path.join( self.component.outputDir, Constants.PROGRESSION_XML_FILE), self.outPath)
def computeBlockRatio(self, index_start, index_end, pwm): # Algorithm DirectRatio : Window ratio = ratio of position having a minimum max ratio if self.algorithm == BlockProcessor.ALGORITHM_OCCURENCE_RATIO_VALUE: window_ratio = 0 for index in range(index_start, index_end): letter_max = pwm.getMostConservedResidu(index) if letter_max != None: max_ratio = pwm.ratioMatrix[letter_max][index] if max_ratio >= self.residuConservationLimit: window_ratio += 1 else: # If the number of "-" is greater than the number of occurence of the most conserved letter # the window is considered as not conserved sum = 0 for letter in Constants.DNA_ALPHABET: sum += pwm.matrix[letter][index] if (pwm.nbSequences - sum) > pwm.matrix[letter_max][index]: return 0.0 else: return 0.0 return window_ratio / float(index_end - index_start) elif self.algorithm == BlockProcessor.ALGORITHM_INFORMATION_RATIO_VALUE: window_ratio = 0 for index in range(index_start, index_end): letter_max = pwm.informationMatrix[Constants.MAX_INDEX][index] max_info = pwm.informationMatrix[letter_max][index] info_ratio = (max_info - pwm.informationLimits[letter_max][0] ) / float(pwm.informationLimits[letter_max][1] - pwm.informationLimits[letter_max][0]) if info_ratio > self.residuConservationLimit: window_ratio += 1 else: # If the number of "-" is greater than the number of occurence of the most conserved letter # the window is considered as not conserved letter_max = pwm.getMostConservedResidu(index) if letter_max != None: sum = 0 for letter in Constants.DNA_ALPHABET: sum += pwm.matrix[letter][index] # If the number of "-" is greater than the number of occurence of the most conserved letter # the window is considered as not conserved if (pwm.nbSequences - sum) > pwm.matrix[letter_max][index]: return 0.0 else: return 0.0 return window_ratio / float(index_end - index_start) else: raise ExecutionException( "BlockProcessor.computeBlockRatio : No known algorithm with name : " + self.algorithm)
def getInputCommStructs(self): authorized_input_classes = self.getAuthorizedInputClasses() input_commstructs = [] if authorized_input_classes != None: input_file = self.getParameter(Component.INPUT_FILE_PARAM, False) if input_file == None: #Compares the list of authorized inputs to outputs of previous components for component in self.previousComponents: previous_result_class = component.resultClass if previous_result_class in authorized_input_classes: input_commstruct = previous_result_class.fromXMLFile( component.getOutputFilePath()) if input_commstruct != None: input_commstructs.append(input_commstruct) else: raise ExecutionException( "Component.getInputCommStructs : input is not of the right class. Class is '" + previous_result_class + "' but waited classes are " + str(authorized_input_classes)) else: #Try to read the input file using classes authorized as input for input_class in authorized_input_classes: try: Log.trace( "Component.getInputCommStructs : Trying to load data from file : " + input_file) input_commstruct = input_class.fromXMLFile(input_file) if input_commstruct != None: input_commstructs.append(input_commstruct) Log.trace( "Component.getInputCommStructs : Data correctly loaded" ) except Exception, exce: Log.trace( "Component.getInputCommStructs : Data not loaded using class '" + str(input_class) + "' : " + str(exce)) pass if len(input_commstructs) == 0: raise ExecutionException( "Component.getInputCommStructs : The provided input file does not contain information the processor '" + self.processorName + "' can manage : " + input_file)
def getAttributeAsfloat(self, att_name, mandatory = False): try: att_value = float( self.getAttribute( att_name)) return att_value except (TypeError, ValueError), val_exce: if mandatory: raise ExecutionException( "MotifStatistics.getAttributeAsint : Unable to convert the value of attribute :'" + att_name + "'. From:\n\t---> " + str( val_exce)) else: return 0
def outputAlignmentToFASTAFile(self, alignment, file_path, desired_species_list): try: file = open(file_path, "w") file.write(alignment.convertToFASTA(desired_species_list)) file.close() except IOError, io_exce: raise ExecutionException( "MSAProcessor.outputAlignmentToFASTAFile : Unable to save alignment to FASAT file : '" + file_path + "'. From \n\t" + str(io_exce))
def execute(self, input_commstructs): if input_commstructs == None or len(input_commstructs) == 0: raise ExecutionException("MAFProcessor.execute : No inputs") input_commstruct = input_commstructs[0] # Retrieve the Processor parameters method = self.getParameter(MSAProcessor.METHOD_PARAM).lower() #Select the method to use if method == MSAProcessor.METHOD_VALUE_CLUSTALW: self.executeClustalW(input_commstruct) elif method == MSAProcessor.METHOD_VALUE_MAFFT: self.executeMAFFT(input_commstruct) else: raise ExecutionException( "MSAProcessor.execute : required method is not known : " + method) return input_commstruct
def outputTable(table, path): try: out_file = FileUtils.openFile(path, "w") for number in table: out_file.write(str(number) + "\n") out_file.flush() out_file.close() except IOError, io_exce: raise ExecutionException( "HistogramProcessor.outputMotifStatistics : Unable to build statistics out_file. From:\n\t---> " + str(io_exce))
def outputMotifStatistics(self, statistics, path): try: file = open(path, "w") for number in statistics: file.write(str(number) + "\n") file.flush() file.close() except IOError, io_exce: raise ExecutionException( "HistogramProcessor.outputMotifStatistics : Unable to build statistics file. From:\n\t---> " + str(io_exce))
def getParameterAsint(self, param_name, mandatory=True): try: param_value = int(self.getParameter(param_name, mandatory)) return param_value except (TypeError, ValueError), val_exce: if mandatory: raise ExecutionException( "Processor.getParameterAsint : Unable to convert the value of parameter :'" + param_name + "'. From:\n\t---> " + str(val_exce)) else: return None
def getParameter(self, param_name, mandatory=True): try: param_value = self.parameters[param_name] return param_value except (TypeError, KeyError), key_exce: if mandatory: raise ExecutionException( "Processor.getParameter : Processor does not know the parameter :'" + param_name + "'. From:\n\t---> " + str(key_exce)) else: return None
def generateRandomSites(self, motif, motif_file_path, site_number): # Retrieve method required parameters RSAT_PATH = self.component.getParameter(Constants.RSAT_DIR_PARAM) dir_path = os.path.join(self.component.outputDir, self.component.getComponentPrefix()) output_path = os.path.join(dir_path, motif + "_sites.fasta") # Execute the RSAT random-seq command cmd = os.path.join(RSAT_PATH, "python-scripts/random-sites") cmd += " -m " + motif_file_path cmd += " -n " + str(site_number) cmd += " -o " + output_path # Execute the command cmd_result = commands.getstatusoutput(cmd) if cmd_result[0] != 0: Log.log( "ImplantSitesProcessor.generateSites : status returned is :" + str(cmd_result[0]) + " for command '" + cmd + "'") Log.log( "ImplantSitesProcessor.generateSites : command output is = \n" + str(cmd_result[1])) raise ExecutionException( "ImplantSitesProcessor.generateSites : Cannot execute random-sites commands. See logs for more details" ) # Parse the result of the command sites = [] try: site_file = open(output_path, "r") for line in site_file: if not line.isspace() and line[0] != ">": sites.append(line.split()[0].upper()) site_file.close() except IOError, io_exce: raise ExecutionException( "ImplantSitesProcessor.generateSites : Unable to read motif sites from file '" + output_path + "'. From:\n\t---> " + str(io_exce))
def outputConfig(self): try: output_path = self.getConfigFilePath() config_file = FileUtils.openFile(output_path, "w") for param in self.parameters.keys(): config_file.write(param + "=" + self.parameters[param] + "\n") config_file.flush() config_file.close() except IOError, io_exce: raise ExecutionException( "Component.outputConfig : Unable to write component config in file '" + output_path + "'. From:\n\t---> " + str(io_exce))
def removeFirstInQueue(self): self.serverQueueLock.acquire() if len(self.serverQueue) > 0: self.serverQueue = self.serverQueue[1:] try: self.outputServerQueue() except ExecutionException, exe_exce: raise ExecutionException( " PipelineManager.removeFirstInQueue : Unable to remove first element in server queue. From:\n\t---> " + str(exe_exce))
def getParameter(self, param_name, mandatory=True): try: param_value = self.config[param_name] return param_value except (TypeError, KeyError), key_exce: if mandatory: raise ExecutionException( "PipelineManager.getParameter : Config parameter :'" + param_name + "' does not exists. From:\n\t---> " + str(key_exce)) else: return None
def prepareOutputDir(self): try: dir_path = os.path.join(self.component.outputDir, self.component.getComponentPrefix()) shutil.rmtree(dir_path, True) FileUtils.createDirectory(dir_path, 0777) file_name = "motif" file_path = os.path.join(dir_path, file_name) return (dir_path, file_path) except IOError, io_exce: raise ExecutionException( "MSAProcessor.prepareOutputDir : Unable to create output directory for FASTA file export : '" + dir_path + "'. From \n\t" + str(io_exce))
def mergeMotifs(self, master_motif, added_motif): if master_motif.indexStart == added_motif.indexEnd: master_motif.indexStart = added_motif.indexStart after = False elif master_motif.indexEnd == added_motif.indexStart: master_motif.indexEnd = added_motif.indexEnd after = True else: raise ExecutionException( "SequenceAlignement.mergeMotifs : The two given motifs are not contiguous" ) master_motif.composeName(self.name) master_motif.pwm.mergeMatrix(added_motif.pwm, after)
def outputMotifDefinition(self, motif, dir_path): file_path = os.path.join(dir_path, motif + ".tab") definition = motif.pwm.convertToHorizontaltab() try: motif_file = open(file_path, "w") motif_file.write(definition) motif_file.flush() motif_file.close() except IOError, io_exce: raise ExecutionException( "ImplantSitesProcessor.outputMotifDefinition : Unable to write motif definition to file '" + file_path + "'. From:\n\t---> " + str(io_exce))
def addToQueue(self, pipelines_filepath, options, verbosity, resume, working_dir): self.serverQueueLock.acquire() if pipelines_filepath != None and len(pipelines_filepath) > 0: self.serverQueue.append( (pipelines_filepath, options, verbosity, resume, working_dir)) try: self.outputServerQueue() except ExecutionException, exe_exce: raise ExecutionException( " PipelineManager.addToQueue : Unable to add element in server queue. From:\n\t---> " + str(exe_exce))
def getMotifsNumberFromTransfac( database_file_path = None): motif_count = 0 if database_file_path == None: database_file_path = RSATUtils.RSAT_JASPAR_MOTIF_DATABASE try: database_file = open( database_file_path, "r") for line in database_file: # detect the transfac definition starting line if line[0:2] == "AC": motif_count = motif_count + 1 except IOError, io_exce: raise ExecutionException( "MotifUtils.getMotifsNumberFromTransfac : Unable to read motif definition from database file '" + database_file_path + "'. From:\n\t---> " + str( io_exce))
def openFile(path, mode="r", chmod=0666): #print "OPENING FILE = " + path if os.path.exists(path): if not os.path.isfile(path): raise ExecutionException( "FileUtils.openFile: unable to open file. Provided path is not a file : " + path) result_file = open(path, mode) return result_file else: result_file = open(path, mode) os.chmod(path, chmod) return result_file