def parseClassfreqResults(self, file_path): result = {} total_number_colums = 9 class_col = 1 frequency_col = 3 try: file = open(file_path, "r") for line in file: tokens = line.split() if len(tokens) == total_number_colums: try: result[int(tokens[class_col])] = int( tokens[frequency_col]) except (TypeError, ValueError), exce: raise ParsingException( "HistogramProcessor.parseClassfreqResults : Unable to get int value from histogram file : '" + file_path + "'. From:\n\t---> " + str(exce)) else: raise ParsingException( "HistogramProcessor.parseClassfreqResults : The histogram file is not correct formatted. Number of column is abnormal : '" + file_path) file.close()
def getAlignmentSequences(sub_node, bedseq, seqalign): for node_sequence in sub_node: if node_sequence.tag.lower( ) == BedSeqAlignmentStatsCommStruct.SEQUENCE_TAG: species = CommStruct.getAttribute( node_sequence, BedSeqAlignmentStatsCommStruct.SEQUENCE_SPECIES_ATT) text = list( CommStruct.getAttribute( node_sequence, BedSeqAlignmentStatsCommStruct.SEQUENCE_TEXT_ATT)) if species != None and text != None: seqalign.addSequence(species, text) else: raise ParsingException( "BedSeqAlignmentStatsCommStruct.getAlignmentSequences : A sequence of the alignment of '" + bedseq.toString() + "' is missing required attributes") else: raise ParsingException( "BedSeqAlignmentStatsCommStruct.getAlignmentSequences : The sequences of the alignment of '" + bedseq.toString() + "' contains an unauthorized element : '" + node_sequence.tag.lower() + "'")
def getAlignmentMotifs( sub_node, bedseq, seqalign): for node_motif in sub_node: if node_motif.tag.lower() == BedSeqAlignmentStatsCommStruct.MOTIF_TAG: start = CommStruct.getAttributeAsint( node_motif, BedSeqAlignmentStatsCommStruct.MOTIF_START_ATT) end = CommStruct.getAttributeAsint( node_motif, BedSeqAlignmentStatsCommStruct.MOTIF_END_ATT) name = CommStruct.getAttribute( node_motif, BedSeqAlignmentStatsCommStruct.MOTIF_NAME_ATT) motif_id = CommStruct.getAttribute( node_motif, BedSeqAlignmentStatsCommStruct.MOTIF_ID_ATT, False) consensus = CommStruct.getAttribute( node_motif, BedSeqAlignmentStatsCommStruct.MOTIF_CONSENSUS_ATT, False) nb_species = CommStruct.getAttributeAsint( node_motif, BedSeqAlignmentStatsCommStruct.MOTIF_NBSPECIES_ATT, False) strand = CommStruct.getAttribute( node_motif, BedSeqAlignmentStatsCommStruct.MOTIF_STRAND_ATT, False) offset = CommStruct.getAttributeAsint( node_motif, BedSeqAlignmentStatsCommStruct.MOTIF_OFFSET_ATT) score = CommStruct.getAttributeAsfloat( node_motif, BedSeqAlignmentStatsCommStruct.MOTIF_SCORE_ATT) # Retrieve the PWM of the motif pwm_s1 = CommStruct.getAttribute( node_motif, BedSeqAlignmentStatsCommStruct.MOTIF_PWM_ATT) if pwm_s1 != None and len( pwm_s1) > 0: pwm_matrix = {} pwm_s2 = pwm_s1.split(";") for line in pwm_s2: pwm_s3 = line.split(":") if len( pwm_s3) > 1: pwm_s4 = pwm_s3[1].split() try: length = 0 for value in pwm_s4: length += 1 if not pwm_matrix.has_key( pwm_s3[0]): pwm_matrix[ pwm_s3[0]]=[] pwm_matrix[ pwm_s3[0]].append( int( value)) except ValueError, val_exce: raise ParsingException( "BedSeqAlignmentStatsCommStruct.getAlignmentMotifs : Unable to get integer value for Motif '" + BedSeqAlignmentStatsCommStruct.MOTIF_PWM_ATT + "' attributes. From:\n\t---> " + str( val_exce)) pwm = PWM() pwm.matrix = pwm_matrix pwm.totalLength = length pwm.nbSequences = nb_species else: pwm = None if start != None and end != None and name != None: motif = Motif( start, end, name, pwm) motif.offset = offset motif.score = score if consensus != None: motif.consensus = consensus if motif_id != None: motif.id = motif_id if strand != None: motif.strand = strand seqalign.addMotif( motif) else: raise ParsingException( "BedSeqAlignmentStatsCommStruct.getAlignmentMotifs : The motifs of the alignment of '" + bedseq.toString() + "' is missing required attributes") else: raise ParsingException( "BedSeqAlignmentStatsCommStruct.getAlignmentMotifs : The motifs of the alignment of '" + bedseq.toString() + "' contains an unauthorized element : '" + node_motif.tag.lower() + "'")
def executePipelines(self): result = True while len(self.serverQueue) > 0: params = self.serverQueue[0] pipelines_filepath = params[0] pipeline_options = params[1] try: verbosity = int(params[2]) except ValueError: verbosity = 1 resume = (params[3].lower() == "true") working_dir = params[4] # Modifies the config if required and initialize logs and output directory if working_dir != None and len(working_dir) > 0: self.config[PFConstants.BASE_OUTPUT_DIR_PARAM] = working_dir # Verify the base output dir and the output dir are created and create them if not FileUtils.createDirectory( self.config[PFConstants.BASE_OUTPUT_DIR_PARAM], 0777) self.config[PFConstants.OUTPUT_DIR_PARAM] = os.path.join( self.getParameter(PFConstants.BASE_OUTPUT_DIR_PARAM), PFConstants.OUTPUT_DIR_NAME) FileUtils.createDirectory( self.config[PFConstants.OUTPUT_DIR_PARAM], 0777) # Switch log location Log.switchFiles(self.getParameter(PFConstants.OUTPUT_DIR_PARAM), verbosity) # Parse the XML file to retrieve the pipelines definition Log.trace( "#################################################################################" ) Log.trace( "# PipelineManager.executePipelines : Reading pipelines from : " + pipelines_filepath) Log.trace( "#################################################################################" ) try: pipelines = PipelineXMLParser.getPipelines(pipelines_filepath) OptionManager.applyOptions(pipelines, pipeline_options) PipelineXMLParser.toXMLFile( self.config[PFConstants.OUTPUT_DIR_PARAM], pipelines) except SyntaxError, syn_exce: raise ParsingException( "PipelineManager.executePipelines : Unable to read definition of pipelines from XML file: '" + pipelines_filepath + "'. From:\n\t---> " + str(syn_exce)) except ParsingException, par_exce: raise ParsingException( "PipelineManager.executePipelines : Unable to read definition of pipelines from XML file: '" + pipelines_filepath + "'. From:\n\t---> " + str(par_exce))
def getParam( node_param, component): param_name = PipelineXMLParser.getAttribute( node_param, PipelineXMLParser.PARAM_NAME_ATT,) param_value = PipelineXMLParser.getAttribute( node_param, PipelineXMLParser.PARAM_VALUE_ATT) if param_name != None and len( param_name) > 0: if param_value != None and len( param_value) > 0: component.addParameters( param_name, param_value) else: raise ParsingException( "PipelineXMLParser.getParam : Malformed parameter - unable to retrieve parameter value in component '" + component.processorName + "'") else: raise ParsingException( "PipelineXMLParser.getParam : Malformed parameter - unable to retrieve parameter name in component '" + component.processorName + "'")
def execute(self, input_comm_structs): # Retrieve the processor parameters bed_filepath = self.getParameter(BEDProcessor.INPUT_BED_FILE_PARAM) species = self.getParameter(BEDProcessor.REFERENCE_SPECIES_PARAM) peak_filepath = self.getParameter(BEDProcessor.INPUT_PEAK_FILE, False) peak_number = self.getParameterAsint(BEDProcessor.PEAK_NUMBER, False) extension_5p = self.getParameterAsint(BEDProcessor.EXTENSION_5P, False) if extension_5p == None: extension_5p = 0 extension_3p = self.getParameterAsint(BEDProcessor.EXTENSION_3P, False) if extension_3p == None: extension_3p = 0 # Parse the BED file and get the BED sequences ordered by species and chromosom bedseq_dictionnary = BEDParser.getBEDSequenceDictionnary( species, bed_filepath, extension_5p, extension_3p) # Extract the desired number of peak if a limit has been defined if peak_number != None: bedseq_dictionnary = self.extractPeaks(bedseq_dictionnary, peak_number) # Parse the peak info file if exists if peak_filepath != None and len(peak_filepath) > 0: chrom_col = 0 max_peak_col = 10 id_col = 13 try: input_file = open(peak_filepath) for line in input_file: tokens = line.split() if len(tokens) > id_col: chrom = tokens[chrom_col] max_peak = self.getTokenAsint(tokens[max_peak_col]) id = tokens[id_col] if chrom != None and max_peak != None and id != None: for bed_seq in bedseq_dictionnary[species + "." + chrom]: if bed_seq.id == id: bed_seq.referenceIndex = max_peak except ParsingException, par_exce: raise ParsingException( "BEDProcessor.execute : An error occured while parsing peak information file : '" + peak_filepath + "'. From:\n\t---> " + str(par_exce)) except IOError, io_exce: raise ParsingException( "BEDProcessor.execute : Unable to open peak information file : '" + peak_filepath + "'. From:\n\t---> " + str(io_exce))
def parseFile(self, file_name, is_chrom_file): try: input_file = open(file_name, 'r') # Verify if the token '##maf' indicating a MAF file is found in the first lines is_maf_file = False while 1: line = input_file.readline() if len(line) == 0: break elif not line.isspace(): tokens = line.split() if tokens != None and len( tokens) > 0 and tokens[0] == "##maf": is_maf_file = True break # if it is a maf file, verify if an index file exists if is_maf_file == True: indexed = False try: index_path = file_name + "index" input_index_file = open(index_path, "r") indexed = True except IOError: pass if indexed == True: Log.trace("MAFProcessor.parseFile : parsing file '" + file_name + "' using index '" + index_path + "'") self.parseBlockListWithIndex(input_index_file, input_file) self.closeFile(input_index_file) else: Log.trace("MAFProcessor.parseFile : parsing file '" + file_name + "'") self.parseBlockListWithoutIndex(input_file, is_chrom_file) self.closeFile(input_file) return else: self.closeFile(input_file) raise ParsingException("MAFProcessor.parseFile : The file '" + file_name + "' is not a MAF file") except IOError, io_exec: raise ParsingException( "MAFProcessor.parseFile : Enable to open file '" + file_name + "'. From:\n\t---> " + str(io_exec))
def getComponent( node_component, prefix): processor_name = PipelineXMLParser.getAttribute( node_component, PipelineXMLParser.COMPONENT_PROCESSOR_ATT) if processor_name != None and len( processor_name) > 0: PipelineXMLParser.RANK += 1 component = Component( processor_name, str(PipelineXMLParser.RANK), prefix) if component != None: for node in node_component: if node.tag.lower() == PipelineXMLParser.PARAM_TAG: PipelineXMLParser.getParam( node, component) return component else: raise ParsingException( "PipelineXMLParser.getComponent : Unable to create Component '" + processor_name) else: raise ParsingException( "PipelineXMLParser.getComponent : Malformed component - unable to retrieve processor name")
def getBEDSequence(node_bedseq, comm_struct): species = CommStruct.getAttribute(node_bedseq, BedSeqCommStruct.BEDSEQ_SPECIES_ATT) chrom = CommStruct.getAttribute(node_bedseq, BedSeqCommStruct.BEDSEQ_CHROM_ATT) start = CommStruct.getAttributeAsint(node_bedseq, BedSeqCommStruct.BEDSEQ_START_ATT) end = CommStruct.getAttributeAsint(node_bedseq, BedSeqCommStruct.BEDSEQ_END_ATT) score = CommStruct.getAttributeAsint(node_bedseq, BedSeqCommStruct.BEDSEQ_SCORE_ATT, False) max = CommStruct.getAttributeAsint( node_bedseq, BedSeqCommStruct.BEDSEQ_MAX_PEAK_ATT, False) id = CommStruct.getAttribute(node_bedseq, BedSeqCommStruct.BEDSEQ_ID_ATT, False) if species != None and chrom != None and start != None and end != None: bed_sequence = BEDSequence(species, chrom, start, end) if score != None: bed_sequence.score = score if max != None: bed_sequence.referenceIndex = max if id != None: bed_sequence.id = id comm_struct.addBEDSequence(bed_sequence) return bed_sequence else: raise ParsingException( "BedSeqCommStruct.getBEDSequence : Malformed BED Sequence - unable to retrieve sequence information" )
def getStatistics( statistics_node, comm_struct): for son_node in statistics_node: if son_node.tag.lower() == BedSeqAlignmentStatsCommStruct.MOTIF_STATS_TAG: name = CommStruct.getAttribute( son_node, BedSeqAlignmentStatsCommStruct.MOTIF_STATS_NAME_ATT) if name != None and len( name) > 0: motif_id = CommStruct.getAttribute( son_node, BedSeqAlignmentStatsCommStruct.MOTIF_STATS_ID_ATT) family = CommStruct.getAttribute( son_node, BedSeqAlignmentStatsCommStruct.MOTIF_STATS_FAMILY_ATT) classe = CommStruct.getAttribute( son_node, BedSeqAlignmentStatsCommStruct.MOTIF_STATS_CLASS_ATT) motif_type = CommStruct.getAttribute( son_node, BedSeqAlignmentStatsCommStruct.MOTIF_STATS_TYPE_ATT) size = CommStruct.getAttributeAsint( son_node, BedSeqAlignmentStatsCommStruct.MOTIF_STATS_SIZE_ATT) motif_stats = MotifStatistics( name) motif_stats.motifID = motif_id motif_stats.motifFamily = family motif_stats.motifClass = classe motif_stats.motifType = motif_type motif_stats.motifSize = size for param_node in son_node: if param_node.tag.lower() == BedSeqAlignmentStatsCommStruct.PARAM_TAG: att_name = CommStruct.getAttribute( param_node, BedSeqAlignmentStatsCommStruct.PARAM_NAME_ATT) att_value = CommStruct.getAttribute( param_node, BedSeqAlignmentStatsCommStruct.PARAM_VALUE_ATT) motif_stats.setAttribute( att_name, att_value) comm_struct.motifStatistics[ name] = motif_stats elif son_node.tag.lower() == BedSeqAlignmentStatsCommStruct.PARAM_TAG: att_name = CommStruct.getAttribute( son_node, BedSeqAlignmentStatsCommStruct.PARAM_NAME_ATT) att_value = CommStruct.getAttribute( son_node, BedSeqAlignmentStatsCommStruct.PARAM_VALUE_ATT) comm_struct.paramStatistics[ att_name] = att_value else: raise ParsingException( "BedSeqAlignmentStatsCommStruct.getStatistics : The statistics contains an unauthorized element : '" + son_node.tag.lower() + "'")
def getIntValue(self, token): try: return int(token) except ValueError, val_exce: raise ParsingException( "PipelineListener.getIntValue : Unable to get integer value of '" + token + "'. From:\n\t---> " + str(val_exce))
def getIntValue(self, token): try: return int(token) except ValueError, val_exce: raise ParsingException( "MAFProcessor : Unable to get integer value of '" + token + "'. From:\n\t---> " + str(val_exce))
class BEDParser: _chrom_col = 0 _startindex_col = 1 _endindex_col = 2 _id_col = 3 _strand_col = 5 # -------------------------------------------------------------------------------------- # Parse the given BED file and return a dictionnary of the BED Sequences # grouped by sequence keys ('species'.'chromosom') @staticmethod def getBEDSequenceDictionnary(species, bed_filepath, extension_5p, extension_3p): sequence_dic = {} try: input_file = open(bed_filepath) for line in input_file: tokens = line.split() if len(tokens) > BEDParser._endindex_col: chrom = tokens[BEDParser._chrom_col].lower() #if chrom[ 0:3] == "chr": if chrom[0:1] != "#": if len(chrom) < 4: chrom = "chr" + chrom start = BEDParser.getTokenAsint( tokens[BEDParser._startindex_col]) end = BEDParser.getTokenAsint( tokens[BEDParser._endindex_col]) if start < end: start = start - extension_5p if start < 0: start = 0 end = end + extension_3p bedsequence = BEDSequence(species, chrom, start, end) if len(tokens) > BEDParser._id_col: bedsequence.id = tokens[BEDParser._id_col] bedsequence_key = bedsequence.getKey() if not sequence_dic.has_key(bedsequence_key): sequence_dic[bedsequence_key] = [] sequence_dic[bedsequence_key].append(bedsequence) else: Log.log( "BEDParser.getBEDSequenceDictionnary : A sequence has inversed start and end coordinates : " + line) else: Log.log("No 'chr' in line :" + line) except ParsingException, par_exce: raise ParsingException( "BEDParser.getBEDSequenceDictionnary : Some attributes are mor numbers. From:\n\t--> " + str(par_exce)) except IOError, io_exce: raise ParsingException( "BEDParser.getBEDSequenceDictionnary : Unable to open the file '" + bed_filepath + "'. From:\n\t--> " + str(io_exce))
def getAttributeAsint( node, att_name, required = True): try: att_value = int( float( CommStruct.getAttribute( node, att_name, required))) return att_value except (TypeError, ValueError), val_exce: if required: raise ParsingException( "CommStruct.getAttributeAsint : Unable to convert the value of attribute :'" + att_name + "'. From:\n\t---> " + str( val_exce)) else: return None
def getAttribute( node, att_name, required = True): try: att_value = node.get( att_name) return att_value except Exception, exce: if required: raise ParsingException( "CommStruct.getAttribute : Node '" + node.tag + "' does not know the attribute :'" + att_name + "'. From:\n\t---> " + str( exce)) else: return None
def getPipeline( node_pipeline): name = PipelineXMLParser.getAttribute( node_pipeline, PipelineXMLParser.PIPELINE_NAME_ATT) if name != None and len( name) > 0: pipeline = Pipeline() pipeline.name = name previous_components = [] PipelineXMLParser.analyseNode( node_pipeline, previous_components, pipeline, "") return pipeline else: raise ParsingException ( "PipelineXMLParser.getPipeline : Malformed pipeline - unable to retrieve pipeline name")
def getMotifStatistics(node_motif, motif): statistics = MotifStatistics() for node_param in node_motif: if node_param.tag.lower() == MotifStatisticsCommStruct.PARAM_TAG: param_name = MotifStatisticsCommStruct.getAttribute( node_param, MotifStatisticsCommStruct.PARAM_NAME_ATT, False) param_value = MotifStatisticsCommStruct.getAttribute( node_param, MotifStatisticsCommStruct.PARAM_VALUE_ATT, False) if param_name != None and len(param_name) > 0: if param_value != None and len(param_value) > 0: if param_name == MotifStatisticsCommStruct.CHI2_PARAM_NAME: statistics.chi2 = MotifStatisticsCommStruct.getTokenAsfloat( param_value, False) elif param_name == MotifStatisticsCommStruct.HISTOGRAM_GRAPH_PATH_PARAM_NAME: statistics.histogramGraphPath = param_value elif param_name == MotifStatisticsCommStruct.HISTOGRAM_PARAM_NAME: statistics.histogram = param_value.split( MotifStatisticsCommStruct. HISTOGRAM_ENTRY_SEPARATOR_CHAR) elif param_name == MotifStatisticsCommStruct.NULL_HISTOGRAM_PARAM_NAME: statistics.nullHistogram = param_value.split( MotifStatisticsCommStruct. HISTOGRAM_ENTRY_SEPARATOR_CHAR) else: Log.log( "MotifStatisticsCommStruct.getMotifAttributes : Unknown attribute name : " + param_name) else: raise ParsingException( "MotifStatisticsCommStruct.getMotifAttributes : Malformed parameter - unable to retrieve parameter value in motif '" + motif.name + "'") else: raise ParsingException( "MotifStatisticsCommStruct.getMotifAttributes : Malformed parameter - unable to retrieve parameter name in motif '" + motif.name + "'") return statistics
def getTokenAsfloat(self, token, required=True): try: att_value = float(token) return att_value except (TypeError, ValueError), val_exce: if required: raise ParsingException( "Processor.getTokenAsfloat : Unable to convert the token to float :'" + token + "'. From:\n\t---> " + str(val_exce)) else: return None
def getCommStructFromXML( commstruct_filepath): commstruct_file = None root_element = None try: commstruct_file = open( commstruct_filepath, "r") tree = parse( commstruct_file) root_element = tree.getroot() commstruct_file.close() except IOError, io_exce: raise ParsingException( "BedSeqAlignmentStatsCommStruct.getCommStructFromXML : Unable to open/close XML commstruct_file '" + commstruct_filepath, "' : " + str( io_exce))
def getTokenAsint(token, required=True): try: att_value = int(token) return att_value except (TypeError, ValueError), val_exce: if required: raise ParsingException( "BEDParser.getTokenAsint : Unable to convert the token to int :'" + token + "'. From:\n\t---> " + str(val_exce)) else: return None
def getBEDSequence( node_bedseq, comm_struct): try: species = CommStruct.getAttribute( node_bedseq, BedSeqAlignmentStatsCommStruct.BEDSEQ_SPECIES_ATT) chrom = CommStruct.getAttribute( node_bedseq, BedSeqAlignmentStatsCommStruct.BEDSEQ_CHROM_ATT) start = CommStruct.getAttributeAsint( node_bedseq, BedSeqAlignmentStatsCommStruct.BEDSEQ_START_ATT) end = CommStruct.getAttributeAsint( node_bedseq, BedSeqAlignmentStatsCommStruct.BEDSEQ_END_ATT) score = CommStruct.getAttributeAsint( node_bedseq, BedSeqAlignmentStatsCommStruct.BEDSEQ_SCORE_ATT, False) peak_max = CommStruct.getAttributeAsint( node_bedseq, BedSeqAlignmentStatsCommStruct.BEDSEQ_MAX_PEAK_ATT, False) peak_id = CommStruct.getAttribute( node_bedseq, BedSeqAlignmentStatsCommStruct.BEDSEQ_ID_ATT, False) except ParsingException, par_exce: raise ParsingException ( "BedSeqAlignmentStatsCommStruct.getBEDSequence : Malformed BED Sequence - some attributes are not numbers. From:\n\t---> " + str( par_exce))
def getStatistics(statistics_node, comm_struct): for son_node in statistics_node: if son_node.tag.lower() == BedSeqCommStruct.PARAM_TAG: att_name = CommStruct.getAttribute( son_node, BedSeqCommStruct.PARAM_NAME_ATT) att_value = CommStruct.getAttribute( son_node, BedSeqCommStruct.PARAM_VALUE_ATT) comm_struct.paramStatistics[att_name] = att_value else: raise ParsingException( "BedSeqCommStruct.getStatistics : The statistics contains an unauthorized element : '" + son_node.tag.lower() + "'")
def getCommStructFromXML(commstruct_filepath): file = None root_element = None try: file = open(commstruct_filepath, "r") tree = parse(file) root_element = tree.getroot() file.close() except IOError, io_exce: raise ParsingException( "MotifStatisticsCommStruct.getCommStructFromXML : Unable to open/close XML file '" + commstruct_filepath, "' : " + str(io_exce))
def getPipelines( pipelines_filepath): PipelineXMLParser.RANK = 0 file = None root_element = None try: file = FileUtils.openFile( pipelines_filepath) tree = parse( file) root_element = tree.getroot() file.close() except IOError, io_exce: raise ParsingException( "PipelineXMLParser.getPipelines : unable to open/close XML file '" + pipelines_filepath + "'. From:\n\t---> " + str( io_exce))
def getMotifsDetailsFromJaspar(): matrix_path = os.path.join( MotifUtils.JASPAR_FLAT_DB_PATH, "MATRIX.txt") matrix_annotation_path = os.path.join( MotifUtils.JASPAR_FLAT_DB_PATH, "MATRIX_ANNOTATION.txt") names = {} id = {} family = {} type = {} classe = {} try: matrix_file = open( matrix_path, "r") matrix_annotation_file = open( matrix_annotation_path, "r") for line in matrix_file: tokens = line.split() if len( tokens) >= 5: current_num = tokens[ 0] current_name = tokens[ 2] + "." + tokens[3] current_id = "".join( tokens[ 4:]) names[ current_num] = current_name id[ current_name] = current_id else: raise ParsingException( "MotifUtils.getMotifsDetailsFromJaspar : Matrix file is not correctly formatted: 5 columns required while " + str( len( tokens)) + " columns are found") for line in matrix_annotation_file: tokens = line.split() current_num = tokens[ 0] if current_num in names.keys(): current_key = tokens[ 1] current_value = "".join( tokens[2:]) if current_key == "family": family[ names[ current_num]] = current_value elif current_key == "class": classe[ names[ current_num]] = current_value elif current_key == "type": type[ names[ current_num]] = current_value else: Log.log( "MotifUtils.getMotifsDetailsFromJaspar : Motif number was not detected in matrix file : " + current_num) matrix_annotation_file.close() matrix_file.close() except (IOError, ParsingException), exce: Log.log( "MotifUtils.getMotifsDetailsFromJaspar : unable to read motifs definition. From:\n\t---> " + str( exce))
def getMotif(node_motif, comm_struct): name = CommStruct.getAttribute( node_motif, MotifStatisticsCommStruct.MOTIF_NAME_ATT) consensus = CommStruct.getAttribute( node_motif, MotifStatisticsCommStruct.MOTIF_CONSENSUS_ATT, False) if name != None: motif = Motif(0, 0, name, None) if consensus != None: motif.consensus = consensus comm_struct.addMotif(motif) statistics = MotifStatisticsCommStruct.getMotifStatistics( node_motif, motif) comm_struct.addMotifStatistics(motif, statistics) else: raise ParsingException( "MotifStatisticsCommStruct.getAlignmentMotifs : The motif is missing required attribute 'name'" )
def getBEDSequenceDictionnary(species, bed_filepath, extension_5p, extension_3p): sequence_dic = {} try: input_file = open(bed_filepath) for line in input_file: tokens = line.split() if len(tokens) > BEDParser._endindex_col: chrom = tokens[BEDParser._chrom_col].lower() #if chrom[ 0:3] == "chr": if chrom[0:1] != "#": if len(chrom) < 4: chrom = "chr" + chrom start = BEDParser.getTokenAsint( tokens[BEDParser._startindex_col]) end = BEDParser.getTokenAsint( tokens[BEDParser._endindex_col]) if start < end: start = start - extension_5p if start < 0: start = 0 end = end + extension_3p bedsequence = BEDSequence(species, chrom, start, end) if len(tokens) > BEDParser._id_col: bedsequence.id = tokens[BEDParser._id_col] bedsequence_key = bedsequence.getKey() if not sequence_dic.has_key(bedsequence_key): sequence_dic[bedsequence_key] = [] sequence_dic[bedsequence_key].append(bedsequence) else: Log.log( "BEDParser.getBEDSequenceDictionnary : A sequence has inversed start and end coordinates : " + line) else: Log.log("No 'chr' in line :" + line) except ParsingException, par_exce: raise ParsingException( "BEDParser.getBEDSequenceDictionnary : Some attributes are mor numbers. From:\n\t--> " + str(par_exce))
def getConfigFromFile(self): config = {} try: output_path = self.getConfigFilePath() config_file = FileUtils.openFile(output_path) for line in config_file: tokens = line.split("=") if tokens != None and len(tokens) == 2: if tokens[1][-1] == "\n": value = tokens[1][:-1] else: value = tokens[1] config[tokens[0]] = value else: raise ParsingException( "Component.getConfigFromFile : Wrongly formatted config file. Should have '<param_name> = <param_value>' instead of " + line) config_file.close() except IOError: return None return config
def toXMLFile( self, output_filepath): raise ParsingException( "The method toXMLFile must be implemented at the inherited class level")
) == BedSeqAlignmentStatsCommStruct.ALIGNMENT_TAG: seqalign = BedSeqAlignmentStatsCommStruct.getAlignment( align_node, bedseq, comm_struct) for sub_node in align_node: if sub_node.tag.lower( ) == BedSeqAlignmentStatsCommStruct.SEQUENCES_TAG: BedSeqAlignmentStatsCommStruct.getAlignmentSequences( sub_node, bedseq, seqalign) elif sub_node.tag.lower( ) == BedSeqAlignmentStatsCommStruct.MOTIFS_TAG: BedSeqAlignmentStatsCommStruct.getAlignmentMotifs( sub_node, bedseq, seqalign) else: raise ParsingException( "BedSeqAlignmentStatsCommStruct.getCommStructFromXML : The alignment of '" + bedseq.toString() + "' contains an unauthorized element : '" + sub_node.tag.lower() + "'") else: raise ParsingException( "BedSeqAlignmentStatsCommStruct.getCommStructFromXML : The BED Sequence '" + bedseq.toString() + "' contains an unauthorized element : '" + align_node.tag.lower() + "'") elif root_son.tag.lower( ) == BedSeqAlignmentStatsCommStruct.STATISTICS_TAG: BedSeqAlignmentStatsCommStruct.getStatistics( root_son, comm_struct) else: raise ParsingException( "BedSeqAlignmentStatsCommStruct.getCommStructFromXML : The data contains an unauthorized element : '"