def bedSequenceComparator(self, bed_seq1, bed_seq2):
     
     if bed_seq1.species != bed_seq2.species:
         raise ExecutionException("CoLocationAnalysisProcessor.bedSequenceComparator : Unable to compare two BED sequences of differents species : '" + bed_seq1.species + "' != '" + bed_seq2.species + "'") 
         
     if bed_seq1.chromosom != bed_seq2.chromosom:
         raise ExecutionException("CoLocationAnalysisProcessor.bedSequenceComparator : Unable to compare two BED sequences of differents chromosom : '" + bed_seq1.chromosom + "' != '" + bed_seq2.chromosom + "'") 
     
     return bed_seq1.indexStart - bed_seq2.indexStart
Exemplo n.º 2
0
    def moveFile(origin_path, destination_path):

        if os.path.exists(origin_path):
            if not os.path.isfile(origin_path):
                raise ExecutionException(
                    "FileUtils.moveFile: unable to move file. Provided path is not a file : "
                    + origin_path)
            if not os.path.exists(destination_path):
                FileUtils.createDirectory(destination_path)
            shutil.move(origin_path, destination_path)
        else:
            raise ExecutionException(
                "FileUtils.moveFile: unable to move file. File does not exist : "
                + origin_path)
Exemplo n.º 3
0
    def parseClustalWResult(self, file_path, desired_species_list):

        try:
            result = {}
            length = 0
            file = open(file_path, "r")
            for line in file:
                tokens = line.split()
                if tokens != None and len(tokens) == 2:
                    species = tokens[0]
                    if desired_species_list == None or len(
                            desired_species_list
                    ) == 0 or species in desired_species_list:
                        if not species in result.keys():
                            result[species] = []
                        result[species].extend(tuple(tokens[1]))
                        length = len(result[species])

            result = self.removeFirstAndLastNoInfoColumns(result, length)

            alignment = SequenceAlignment()
            for species in result:
                alignment.addSequence(species, result[species])
            file.close()
            return alignment

        except IOError, io_exce:
            raise ExecutionException(
                "MSAProcessor.parseClustalWResult : Unable to open the ClustalW result file : '"
                + file_path + "'. From:\n\t---> " + str(io_exce))
    def execute(self, input_commstructs):

        if input_commstructs == None or len(input_commstructs) == 0:
            raise ExecutionException(
                "CompareIdentifiedMotifsProcessor.execute : No inputs")

        input_commstruct = input_commstructs[0]

        # Retrieve the processor parameters
        database_file = self.getParameter(
            CompareIdentifiedMotifsProcessor.MOTIF_DATABASE_FILE_PARAM)

        database_format = self.getParameter(
            CompareIdentifiedMotifsProcessor.MOTIF_DATABASE_FORMAT_PARAM)

        motif_list_line = self.getParameter(
            CompareIdentifiedMotifsProcessor.MOTIF_LIST_PARAM)
        motif_name_list = motif_list_line.split()

        # Retrieve the PWM of the reference motifs
        reference_motif_list = self.getMotifMatrices(motif_name_list,
                                                     database_file,
                                                     database_format)

        # Retrieve the list of identified motifs
        identified_motifs = self.getIdentifiedMotifs(input_commstruct)

        # Compare motifs
        self.compareMotifs(reference_motif_list, identified_motifs)

        return input_commstruct
Exemplo n.º 5
0
    def getMotifsDetailsFromTransfac( database_file_path = None):

        id = {}
        family = {}
        type = {}
        classe = {}
        
        if database_file_path == None:
            database_file_path = RSATUtils.RSAT_JASPAR_MOTIF_DATABASE

        try:
            database_file = open( database_file_path, "r")
            for line in database_file:
                # detect the transfac definition starting line
                if line[ 0:2] == "AC":
                    tokens = line.split()
                    motif_name = tokens[1]
                    # get the definition until the definition final line ("//")
                    for line in database_file:
                        if line[ 0:2] == "ID":
                            sub_tokens = line.split()
                            id[ motif_name] = sub_tokens[ 1]
                        if line[ 0:2] == "CC":
                            sub_tokens = line.split()
                            if sub_tokens[ 1].lower() == "family:":
                                family[ motif_name] = sub_tokens[ 2]
                            elif sub_tokens[ 1].lower() == "type:":
                                type[ motif_name] = sub_tokens[ 2]
                            elif sub_tokens[ 1].lower() == "class:":
                                classe[ motif_name] = sub_tokens[ 2]
                        elif line[0:2] == "//":
                            break
        except IOError, io_exce:
            raise ExecutionException( "MotifUtils.getMotifsDetailsFromTransfac : Unable to read motif definition from database file '" + database_file_path + "'. From:\n\t---> " + str( io_exce))
Exemplo n.º 6
0
    def getMotifsSizesFromTransfac( database_file_path = None):

        sizes = {}

        if database_file_path == None:
            database_file_path = RSATUtils.RSAT_JASPAR_MOTIF_DATABASE

        try:
            database_file = open( database_file_path, "r")
            for line in database_file:
                # detect the transfac definition starting line
                if line[0:2] == "AC":
                    tokens = line.split()
                    motif_name = tokens[1]
                    # get the definition until the definition final line ("//")
                    for line in database_file:
                        if line[0:2] == "PO":
                            # read the values of the PWM and count the lines
                            size = 0
                            for line in database_file:
                                if line[0:2] != "XX":
                                    size += 1
                                else:
                                    break
                            break
                        elif line[0:2] == "//":
                            break
                    # assign the size to the corresponding motif name
                    if size != 0:
                        sizes[ motif_name] = size
        except IOError, io_exce:
            raise ExecutionException( "MotifUtils.getMotifsSizeFromTransfacDefinition : Unable to read motif definition from database file '" + database_file_path + "'. From:\n\t---> " + str( io_exce))
Exemplo n.º 7
0
    def addColumn(self, values):

        if len(values) != len(Constants.DNA_ALPHABET):
            raise ExecutionException(
                "PWM.addColumn : Incorrect number of residu values : " +
                str(values))

        if self.matrix == None:
            self.matrix = {}
            for letter in Constants.DNA_ALPHABET:
                self.matrix[letter] = []
            self.matrix[Constants.MAX_INDEX] = []

        letter_index = 0
        max = 0
        letter_max = None
        for letter in sorted(Constants.DNA_ALPHABET):
            value = values[letter_index]
            self.matrix[letter].append(value)
            if value >= max:
                max = value
                letter_max = letter
            letter_index += 1

        self.matrix[Constants.MAX_INDEX].append(letter_max)

        self.totalLength += 1
    def execute(self, input_commstructs):

        if input_commstructs == None or len(input_commstructs) == 0:
            raise ExecutionException(
                "CompareStatisticsProcessor.execute : No inputs")

        self.compareMotifHistogram(input_commstructs)
Exemplo n.º 9
0
    def execute(self, input_commstructs):

        if input_commstructs == None or len(input_commstructs) == 0:
            raise ExecutionException(
                "ContingencyProcessor.execute : No inputs")

        input_commstruct = input_commstructs[0]

        # Retrieve the processor parameters
        reference_motif = self.getParameter(
            ContingencyProcessor.REFERENCE_MOTIF_PARAM)

        # Prepare the processor output dir
        dir_path = os.path.join(self.component.outputDir,
                                self.component.getComponentPrefix())
        shutil.rmtree(dir_path, True)
        FileUtils.createDirectory(dir_path, 0777)

        # Initialize motif contingency statistics
        for motif_name in input_commstruct.motifStatistics.keys():
            motif_statistics = input_commstruct.motifStatistics[motif_name]
            motif_statistics.setAttribute(
                MotifStatistics.CONTIGENCY_MOTIF_COOCCURENCE, 0)
            motif_statistics.setAttribute(
                MotifStatistics.CONTIGENCY_REFERENCE_MOTIF_BEDSEQ, 0)
            motif_statistics.setAttribute(
                MotifStatistics.CONTINGENCY_TOTAL_BEDSEQ, 0)

        # Compute the contingency table
        input_commstruct = self.computeContingencyTable(
            input_commstruct, reference_motif)

        return input_commstruct
Exemplo n.º 10
0
    def initServerQueue(self):

        queue_file_path = os.path.join(
            self.config[PFConstants.QUEUE_DIR_PARAM],
            PFConstants.SERVER_QUEUE_FILE_NAME)
        if os.path.exists(queue_file_path):
            try:
                commands_list = []
                file = FileUtils.openFile(queue_file_path)
                for line in file:
                    command_params = [None, None, 0, "True", None]
                    if not line.isspace(
                    ) and line[0] != PFConstants.COMMENT_CHAR:
                        tokens = line.split("|**|")
                        if len(tokens) > 0 and len(tokens) <= 5:
                            for index in range(len(tokens)):
                                command_params[index] = tokens[index]
                        commands_list.append(command_params)
                file.close()
                options = ast.literal_eval(command_params[1])
                for command_params in commands_list:
                    self.addToQueue(command_params[0], options,
                                    command_params[2], command_params[3],
                                    command_params[4])
            except IOError, io_exce:
                raise ExecutionException(
                    " PipelineManager.initServerQueue : Unable to read Server queue from file : "
                    + queue_file_path + ". From:\n\t---> " + str(io_exce))
Exemplo n.º 11
0
    def execute( self, input_commstructs):
    
        if input_commstructs == None or len( input_commstructs) == 0:
            raise ExecutionException( "FinalOutputProcessor.execute : No inputs")
        
        input_commstruct = input_commstructs[0]
        
        # Retrieve the processor parameters
        self.dbPath = self.getParameter( FinalOutputProcessor.MOTIF_DATABASE_PATH_PARAM)
        
        # Retrieve the list of motif database files to use
        database_file_line = self.getParameter( FinalOutputProcessor.MOTIF_DATABASE_FILE_LIST_PARAM)
        if database_file_line != None and not database_file_line.isspace():
            file_list = database_file_line.split()
            self.dbFiles = []
            for file_path in file_list:
                self.dbFiles.append( os.path.join( self.dbPath, file_path))
        else:
            raise ExecutionException( "FinalOutputProcessor.getMethodParameters : No motif database file specified in parameter '" + FinalOutputProcessor.MOTIF_DATABASE_FILE_LIST_PARAM + "'")

        # Add the custom motif database files if any
        custom_database_file_line = self.getParameter( FinalOutputProcessor.CUSTOM_MOTIF_DATABASE_FILE_PARAM, False)
        if custom_database_file_line != None and not custom_database_file_line.isspace():
            self.dbFiles.append( custom_database_file_line)
        
        limit_value = self.getParameter( FinalOutputProcessor.DISPLAY_LIMIT_VALUE, False)
        if limit_value == None:
            limit_value = 1.0
        
        # Prepare the processor output dir
        self.outPath = os.path.join( self.component.outputDir, self.component.getComponentPrefix())
        shutil.rmtree( self.outPath, True)
        FileUtils.createDirectory( self.outPath, 0777)
        
        # Copy motif graph and stats files
        analysis = self.AnalyseMotifStats( input_commstruct)
        
        # Create motif logos
        self.createLogos( input_commstruct)
        
        # Output Results
        self.outputClassification( input_commstruct, analysis, limit_value)
        
        # Copy other information
        FileUtils.copyFile( os.path.join( self.component.outputDir, Constants.PROGRESSION_XSL_FILE), self.outPath) 
        FileUtils.copyFile( os.path.join( self.component.outputDir, Constants.PROGRESSION_XML_FILE), self.outPath)
Exemplo n.º 12
0
    def computeBlockRatio(self, index_start, index_end, pwm):

        # Algorithm DirectRatio : Window ratio = ratio of position having a minimum max ratio
        if self.algorithm == BlockProcessor.ALGORITHM_OCCURENCE_RATIO_VALUE:
            window_ratio = 0
            for index in range(index_start, index_end):
                letter_max = pwm.getMostConservedResidu(index)
                if letter_max != None:
                    max_ratio = pwm.ratioMatrix[letter_max][index]
                    if max_ratio >= self.residuConservationLimit:
                        window_ratio += 1
                    else:
                        # If the number of "-" is greater than the number of occurence of the most conserved letter
                        # the window is considered as not conserved
                        sum = 0
                        for letter in Constants.DNA_ALPHABET:
                            sum += pwm.matrix[letter][index]
                        if (pwm.nbSequences -
                                sum) > pwm.matrix[letter_max][index]:
                            return 0.0
                else:
                    return 0.0

            return window_ratio / float(index_end - index_start)

        elif self.algorithm == BlockProcessor.ALGORITHM_INFORMATION_RATIO_VALUE:
            window_ratio = 0
            for index in range(index_start, index_end):
                letter_max = pwm.informationMatrix[Constants.MAX_INDEX][index]
                max_info = pwm.informationMatrix[letter_max][index]
                info_ratio = (max_info - pwm.informationLimits[letter_max][0]
                              ) / float(pwm.informationLimits[letter_max][1] -
                                        pwm.informationLimits[letter_max][0])
                if info_ratio > self.residuConservationLimit:
                    window_ratio += 1
                else:
                    # If the number of "-" is greater than the number of occurence of the most conserved letter
                    # the window is considered as not conserved
                    letter_max = pwm.getMostConservedResidu(index)
                    if letter_max != None:
                        sum = 0
                        for letter in Constants.DNA_ALPHABET:
                            sum += pwm.matrix[letter][index]
                        # If the number of "-" is greater than the number of occurence of the most conserved letter
                        # the window is considered as not conserved
                        if (pwm.nbSequences -
                                sum) > pwm.matrix[letter_max][index]:
                            return 0.0
                    else:
                        return 0.0

            return window_ratio / float(index_end - index_start)

        else:
            raise ExecutionException(
                "BlockProcessor.computeBlockRatio : No known algorithm with name : "
                + self.algorithm)
Exemplo n.º 13
0
    def getInputCommStructs(self):

        authorized_input_classes = self.getAuthorizedInputClasses()

        input_commstructs = []
        if authorized_input_classes != None:
            input_file = self.getParameter(Component.INPUT_FILE_PARAM, False)
            if input_file == None:
                #Compares the list of authorized inputs to outputs of previous components
                for component in self.previousComponents:
                    previous_result_class = component.resultClass
                    if previous_result_class in authorized_input_classes:
                        input_commstruct = previous_result_class.fromXMLFile(
                            component.getOutputFilePath())
                        if input_commstruct != None:
                            input_commstructs.append(input_commstruct)
                    else:
                        raise ExecutionException(
                            "Component.getInputCommStructs : input is not of the right class. Class is '"
                            + previous_result_class +
                            "' but waited classes are " +
                            str(authorized_input_classes))
            else:
                #Try to read the input file using classes authorized as input
                for input_class in authorized_input_classes:
                    try:
                        Log.trace(
                            "Component.getInputCommStructs : Trying to load data from file : "
                            + input_file)
                        input_commstruct = input_class.fromXMLFile(input_file)
                        if input_commstruct != None:
                            input_commstructs.append(input_commstruct)
                        Log.trace(
                            "Component.getInputCommStructs : Data correctly loaded"
                        )
                    except Exception, exce:
                        Log.trace(
                            "Component.getInputCommStructs : Data not loaded using class '"
                            + str(input_class) + "' : " + str(exce))
                        pass
                if len(input_commstructs) == 0:
                    raise ExecutionException(
                        "Component.getInputCommStructs : The provided input file does not contain information the processor '"
                        + self.processorName + "' can manage : " + input_file)
Exemplo n.º 14
0
 def getAttributeAsfloat(self, att_name, mandatory = False):
     
     try:
         att_value = float( self.getAttribute( att_name))
         return att_value
     except (TypeError, ValueError), val_exce:
         if mandatory:
             raise ExecutionException( "MotifStatistics.getAttributeAsint : Unable to convert the value of attribute :'" + att_name + "'. From:\n\t---> " + str( val_exce))
         else:
             return 0
Exemplo n.º 15
0
    def outputAlignmentToFASTAFile(self, alignment, file_path,
                                   desired_species_list):

        try:
            file = open(file_path, "w")
            file.write(alignment.convertToFASTA(desired_species_list))
            file.close()
        except IOError, io_exce:
            raise ExecutionException(
                "MSAProcessor.outputAlignmentToFASTAFile : Unable to save alignment to FASAT file : '"
                + file_path + "'. From \n\t" + str(io_exce))
Exemplo n.º 16
0
    def execute(self, input_commstructs):

        if input_commstructs == None or len(input_commstructs) == 0:
            raise ExecutionException("MAFProcessor.execute : No inputs")

        input_commstruct = input_commstructs[0]

        # Retrieve the Processor parameters
        method = self.getParameter(MSAProcessor.METHOD_PARAM).lower()

        #Select the method to use
        if method == MSAProcessor.METHOD_VALUE_CLUSTALW:
            self.executeClustalW(input_commstruct)
        elif method == MSAProcessor.METHOD_VALUE_MAFFT:
            self.executeMAFFT(input_commstruct)
        else:
            raise ExecutionException(
                "MSAProcessor.execute : required method is not known : " +
                method)

        return input_commstruct
Exemplo n.º 17
0
    def outputTable(table, path):

        try:
            out_file = FileUtils.openFile(path, "w")
            for number in table:
                out_file.write(str(number) + "\n")
                out_file.flush()
            out_file.close()
        except IOError, io_exce:
            raise ExecutionException(
                "HistogramProcessor.outputMotifStatistics : Unable to build statistics out_file. From:\n\t---> "
                + str(io_exce))
Exemplo n.º 18
0
    def outputMotifStatistics(self, statistics, path):

        try:
            file = open(path, "w")
            for number in statistics:
                file.write(str(number) + "\n")
                file.flush()
            file.close()
        except IOError, io_exce:
            raise ExecutionException(
                "HistogramProcessor.outputMotifStatistics : Unable to build statistics file. From:\n\t---> "
                + str(io_exce))
Exemplo n.º 19
0
    def getParameterAsint(self, param_name, mandatory=True):

        try:
            param_value = int(self.getParameter(param_name, mandatory))
            return param_value
        except (TypeError, ValueError), val_exce:
            if mandatory:
                raise ExecutionException(
                    "Processor.getParameterAsint : Unable to convert the value of parameter :'"
                    + param_name + "'. From:\n\t---> " + str(val_exce))
            else:
                return None
Exemplo n.º 20
0
    def getParameter(self, param_name, mandatory=True):

        try:
            param_value = self.parameters[param_name]
            return param_value
        except (TypeError, KeyError), key_exce:
            if mandatory:
                raise ExecutionException(
                    "Processor.getParameter : Processor does not know the parameter :'"
                    + param_name + "'. From:\n\t---> " + str(key_exce))
            else:
                return None
Exemplo n.º 21
0
    def generateRandomSites(self, motif, motif_file_path, site_number):

        # Retrieve method required parameters
        RSAT_PATH = self.component.getParameter(Constants.RSAT_DIR_PARAM)
        dir_path = os.path.join(self.component.outputDir,
                                self.component.getComponentPrefix())
        output_path = os.path.join(dir_path, motif + "_sites.fasta")

        # Execute the RSAT random-seq command
        cmd = os.path.join(RSAT_PATH, "python-scripts/random-sites")
        cmd += " -m " + motif_file_path
        cmd += " -n " + str(site_number)
        cmd += " -o " + output_path

        # Execute the command
        cmd_result = commands.getstatusoutput(cmd)
        if cmd_result[0] != 0:
            Log.log(
                "ImplantSitesProcessor.generateSites : status returned is :" +
                str(cmd_result[0]) + " for command '" + cmd + "'")
            Log.log(
                "ImplantSitesProcessor.generateSites : command output is = \n"
                + str(cmd_result[1]))
            raise ExecutionException(
                "ImplantSitesProcessor.generateSites : Cannot execute random-sites commands. See logs for more details"
            )

        # Parse the result of the command
        sites = []
        try:
            site_file = open(output_path, "r")
            for line in site_file:
                if not line.isspace() and line[0] != ">":
                    sites.append(line.split()[0].upper())
            site_file.close()
        except IOError, io_exce:
            raise ExecutionException(
                "ImplantSitesProcessor.generateSites : Unable to read motif sites from file '"
                + output_path + "'. From:\n\t---> " + str(io_exce))
Exemplo n.º 22
0
    def outputConfig(self):

        try:
            output_path = self.getConfigFilePath()
            config_file = FileUtils.openFile(output_path, "w")
            for param in self.parameters.keys():
                config_file.write(param + "=" + self.parameters[param] + "\n")
                config_file.flush()
            config_file.close()
        except IOError, io_exce:
            raise ExecutionException(
                "Component.outputConfig : Unable to write component config in file '"
                + output_path + "'. From:\n\t---> " + str(io_exce))
Exemplo n.º 23
0
    def removeFirstInQueue(self):

        self.serverQueueLock.acquire()

        if len(self.serverQueue) > 0:
            self.serverQueue = self.serverQueue[1:]

        try:
            self.outputServerQueue()
        except ExecutionException, exe_exce:
            raise ExecutionException(
                " PipelineManager.removeFirstInQueue : Unable to remove first element in server queue. From:\n\t---> "
                + str(exe_exce))
Exemplo n.º 24
0
    def getParameter(self, param_name, mandatory=True):

        try:
            param_value = self.config[param_name]
            return param_value
        except (TypeError, KeyError), key_exce:
            if mandatory:
                raise ExecutionException(
                    "PipelineManager.getParameter : Config parameter :'" +
                    param_name + "' does not exists. From:\n\t---> " +
                    str(key_exce))
            else:
                return None
Exemplo n.º 25
0
    def prepareOutputDir(self):

        try:
            dir_path = os.path.join(self.component.outputDir,
                                    self.component.getComponentPrefix())
            shutil.rmtree(dir_path, True)
            FileUtils.createDirectory(dir_path, 0777)
            file_name = "motif"
            file_path = os.path.join(dir_path, file_name)
            return (dir_path, file_path)
        except IOError, io_exce:
            raise ExecutionException(
                "MSAProcessor.prepareOutputDir : Unable to create output directory for FASTA file export : '"
                + dir_path + "'. From \n\t" + str(io_exce))
Exemplo n.º 26
0
    def mergeMotifs(self, master_motif, added_motif):

        if master_motif.indexStart == added_motif.indexEnd:
            master_motif.indexStart = added_motif.indexStart
            after = False
        elif master_motif.indexEnd == added_motif.indexStart:
            master_motif.indexEnd = added_motif.indexEnd
            after = True
        else:
            raise ExecutionException(
                "SequenceAlignement.mergeMotifs : The two given motifs are not contiguous"
            )

        master_motif.composeName(self.name)
        master_motif.pwm.mergeMatrix(added_motif.pwm, after)
Exemplo n.º 27
0
    def outputMotifDefinition(self, motif, dir_path):

        file_path = os.path.join(dir_path, motif + ".tab")

        definition = motif.pwm.convertToHorizontaltab()

        try:
            motif_file = open(file_path, "w")
            motif_file.write(definition)
            motif_file.flush()
            motif_file.close()
        except IOError, io_exce:
            raise ExecutionException(
                "ImplantSitesProcessor.outputMotifDefinition : Unable to write motif definition to file '"
                + file_path + "'. From:\n\t---> " + str(io_exce))
Exemplo n.º 28
0
    def addToQueue(self, pipelines_filepath, options, verbosity, resume,
                   working_dir):

        self.serverQueueLock.acquire()

        if pipelines_filepath != None and len(pipelines_filepath) > 0:
            self.serverQueue.append(
                (pipelines_filepath, options, verbosity, resume, working_dir))

        try:
            self.outputServerQueue()
        except ExecutionException, exe_exce:
            raise ExecutionException(
                " PipelineManager.addToQueue : Unable to add element in server queue. From:\n\t---> "
                + str(exe_exce))
Exemplo n.º 29
0
    def getMotifsNumberFromTransfac( database_file_path = None):

        motif_count = 0

        if database_file_path == None:
            database_file_path = RSATUtils.RSAT_JASPAR_MOTIF_DATABASE

        try:
            database_file = open( database_file_path, "r")
            for line in database_file:
                # detect the transfac definition starting line
                if line[0:2] == "AC":
                    motif_count = motif_count + 1
        except IOError, io_exce:
            raise ExecutionException( "MotifUtils.getMotifsNumberFromTransfac : Unable to read motif definition from database file '" + database_file_path + "'. From:\n\t---> " + str( io_exce))
Exemplo n.º 30
0
    def openFile(path, mode="r", chmod=0666):

        #print "OPENING FILE = " + path

        if os.path.exists(path):
            if not os.path.isfile(path):
                raise ExecutionException(
                    "FileUtils.openFile: unable to open file. Provided path is not a file : "
                    + path)
            result_file = open(path, mode)
            return result_file
        else:
            result_file = open(path, mode)
            os.chmod(path, chmod)
            return result_file