Exemplo n.º 1
0
    def get_column(table, index, start=None, end=None):
        
        # Indexes check 
        if index > len(table[0]):
            Logger.get_instance().error(" TableWrapper.get_column : the column index is greater then column number of table\n")
            raise ParsingFileException(" TableWrapper.get_column : the column index is greater then column number of table\n")
        if start != None and end != None:
            if start == end:
                Logger.get_instance().error(" TableWrapper.get_column : start and end indexes can't be equal\n")
                raise ParsingFileException(" TableWrapper.get_column :  start and end indexes can't be equal\n")
            elif start > end:
                Logger.get_instance().error(" TableWrapper.get_column : start index can't be greater than end index\n")
                raise ParsingFileException(" TableWrapper.get_column : start index can't be greater than end index\n")
            else:
                Logger.get_instance().debug(" TableWrapper.get_column : start and end indexes are correct\n" +'start: '+str(start)+ ', end: '+str(end))
        elif start != None and end == None:
            Logger.get_instance().debug(" TableWrapper.get_column : start and end indexes are correct\n" +'start: '+str(start)+ ', end: '+str(end))

        # in according to combination of index, start and end indexes the method returns 
        # a specific column
        if start == None and end == None:
            columns = zip(*table)
            return list(columns[index])
        elif start != None and end != None:
            columns = zip(*table)
            return list(columns[index][start:end])
        elif start != None and end == None:
            columns = zip(*table)
            return list(columns[index][start:])
        elif start == None and end != None:
            columns = zip(*table)
            return list(columns[index][0:end])
Exemplo n.º 2
0
    def make_dictionary(self):

        Logger.get_instance().info(
            " Creation of a dictionary for novel gene of dataset 2\
The dictionary structure is : \n \
{gene = [ isoform1, isoform2,...isoformN]}")

        self.path_home = Constants.PATH_HOME
        self.path_input_file = self.path_home + PropertyManager.get_instance(
        ).get_property(DataConstants.DOWNLOAD_DICTIONARY_INPUT_FILE_PROPERTY,
                       True)

        self.dictionary_output_path = self.path_home + PropertyManager.get_instance(
        ).get_property(DataConstants.DOWNLOAD_DICTIONARY_OUTPUT_PATH_PROPERTY,
                       True)
        self.output_file_path = self.dictionary_output_path + PropertyManager.get_instance(
        ).get_property(DataConstants.DOWNLOAD_DICTIONARY_FILE_OUTPUT_PROPERTY,
                       True)

        dict_identifier = InfoFasta.make_dictionary(self.path_input_file)

        self.dict_file = FileUtils.open_text_w(self.output_file_path)

        pickle.dump(dict_identifier, self.dict_file)

        Logger.get_instance().info(
            " The creation of a dictionary is completed \n\n")
Exemplo n.º 3
0
 def iupred_motifs(self):
     
     Logger.get_instance().info( "        .....Start of IUPred motifs analysis.....\n")
     
     self.iupred_folder = self.path_home +  PropertyManager.get_instance().get_property( DataConstants.MOTIFS_IUPRED_FOLDER_PROPERTY,True)
     
     # Iupred Analysis at threshold value of 0.4
     
     Timer.get_instance().step(" Start of IUPred motifs analysis - threshold = 0.4 \n")
     
     self.threshold_1 = Constants.MOTIFS_THRESHOLD_1
     self.output_folder_1 = self.path_home +  PropertyManager.get_instance().get_property( DataConstants.MOTIFS_IUP_OUTPUT_FOLDER_1_PROPERTY, True)
     
     GlobalOverlapRegionAnalysis.iupred_overlap_analysis(self.protein_list,self.iupred_folder, self.output_folder_1, self.threshold_1,
                                                         self.motif_folder,self.domain_region_file)
     
     Timer.get_instance().step(" End of IUPred motifs analysis - threshold = 0.4 \n")
     
     
     # Iupred Analysis at threshold value of 0.5
     
     Timer.get_instance().step(" Start of IUPred motifs analysis - threshold = 0.5 \n")
     self.threshold_2 = Constants.MOTIFS_THRESHOLD_2
     self.output_folder_2 = self.path_home +  PropertyManager.get_instance().get_property( DataConstants.MOTIFS_IUP_OUTPUT_FOLDER_2_PROPERTY, True)
     
     GlobalOverlapRegionAnalysis.iupred_overlap_analysis(self.protein_list,self.iupred_folder, self.output_folder_2, self.threshold_2,
                                                         self.motif_folder,self.domain_region_file)
     
     Timer.get_instance().step(" End of IUPred motifs analysis - threshold = 0.5 \n")
     
     Logger.get_instance().info( "        .....End of IUPred motifs analysis.....\n")
Exemplo n.º 4
0
    def change_header(path_input_file, path_ouptut_file, source=1, type_id=1):

        file_input = FileUtils.open_text_r(path_input_file)
        seq_file = file_input.read()

        file_output = FileUtils.open_text_a(path_ouptut_file)

        # Warning: Check that the file have the '>' char only at beginning of header lines and not in other points
        # otherwise the split will occur in an incorrect way!
        seq_file_list = seq_file.split('>')[1:]

        for seq in seq_file_list:
            lines = seq.split('\n')
            header = lines[0]
            Logger.get_instance().info(header)
            # Ensembl
            if source == 1:
                new_header = '>' + header.split('|')[2] + '\n'  # see Note
            # Uniprot
            elif source == 2:
                diff_header = header.split(' ')[0]
                # AC
                if type_id == 1:
                    new_header = '>' + diff_header.split('|')[1] + '\n'
                # ID
                elif type_id == 2:
                    new_header = '>' + diff_header.split('|')[2] + '\n'

            fasta = new_header + '\n'.join(lines[1:])

            file_output.write(fasta)

        file_output.close()
Exemplo n.º 5
0
    def overlap_file_write(table_tool, output_folder, protein, toolname):

        dict_filename = Constants.DICT_FILE_OUTPUT

        filename_slim_region = dict_filename[toolname][
            0] + protein + Constants.EXTENSION_TXT_FILE
        filename_doamin_region = dict_filename[toolname][
            1] + protein + Constants.EXTENSION_TXT_FILE
        filepath_slim = output_folder + filename_slim_region
        filepath_domain = output_folder + filename_doamin_region

        title_slim_region = [[
            Constants.SLIM_NAME, Constants.SLIM_REGION_COUNTER,
            Constants.START_SLIM_REGION, Constants.END_SLIM_REGION,
            Constants.TOOLNAME, Constants.TOOLNAME_REGION_COUNTER,
            Constants.START_TOOL_REGION, Constants.END_TOOL_REGION,
            Constants.OVERLAP_OUTCOME, Constants.OVERLAP_LENGTH
        ]]

        table_slim_region = title_slim_region + table_tool[0]
        table_domain_region = table_tool[1]

        if table_tool[0] != []:
            FileWriter.write_table(filepath_slim, table_slim_region)
        else:
            Logger.get_instance().debug("  The slim overlap file " +
                                        filename_slim_region +
                                        " has not been written")
        if table_tool[1] != []:
            FileWriter.write_table(filepath_domain, table_domain_region)
        else:
            Logger.get_instance().debug("  The domain overlap file " +
                                        filename_doamin_region +
                                        " has not been written\n\n")
Exemplo n.º 6
0
    def disordp_region(directory, disordp_file, protname):
        

        filepath = directory + disordp_file 
        
        # the information about disordbp bind are memorized in a file containing information of many proteins
        # this command allows to select the output information about one only protein (protname)
        output_proteins = DisoRDPbind.output_reading(filepath)
        
        proteins = [ line.split('\n')[0] for line in output_proteins]
        if protname in proteins:
            prot_selected = [ line for line in output_proteins if line.split('\n')[0]==protname ]
            if 'WARNING:' in prot_selected[0]:
                Logger.get_instance().warning( " This protein contains >=10000 residues\
 (DisoRBDbind cannot predict the proteins with size >=10000) " + protname)
                disordp_region_table = []
            else:   
                # extraction of disordp regions 
                # the information are memorized in a table
                dict_info = DisoRDPbind.fraction_calculation(prot_selected[0])
                key_dictionary = Constants.KEY_DISORDP
                # Table containing the anchor regions 
                disordp_table = dict_info[key_dictionary]
                disordp_region_table = [ [int(line[2]), int(line[3])] for line in disordp_table]
            
        else:
            Logger.get_instance().warning(' This protein is not in DisoRDPbind prediction \
(DisoRBDbind cannot predict the proteins with size of 4 amino acids) ' + protname)
            disordp_region_table = []
            
        return disordp_region_table      
Exemplo n.º 7
0
    def dictionary_identifier(self):

        Logger.get_instance().info(
            " Creation of a dictionary for novel gene of dataset 2\
The dictionary structure is : \n \
{gene = [ isoform1, isoform2,...isoformN]}")

        self.ensembl_path_output = Constants.PATH_HOME + PropertyManager.get_instance(
        ).get_property(DataConstants.ENSEMBL_OUTPUT_PATH_SEQUENCE_PROPERTY,
                       True)
        self.ensembl_output_dataset2 = self.ensembl_path_output + PropertyManager.get_instance(
        ).get_property(DataConstants.ENSEMBL_FILE_SEQUENCES_2_PROPERTY, True)

        self.dictionary_output = Constants.PATH_HOME + PropertyManager.get_instance(
        ).get_property(DataConstants.DICTIONARY_PATH_OUTPUT_PROPERTY, True)
        self.dictionary_namefile = self.dictionary_output + PropertyManager.get_instance(
        ).get_property(DataConstants.DICTIONARY_NAME_FILE_PROPERTY, True)

        dict_identifier = InfoFasta.make_dictionary(
            self.ensembl_output_dataset2)

        file_dict = FileUtils.open_text_w(self.dictionary_namefile)

        pickle.dump(dict_identifier, file_dict)

        Logger.get_instance().info(
            " The creation of a dictionary for novel gene in dataset 2 is completed \n\n"
        )
Exemplo n.º 8
0
    def read_properties(self, file_path):

        Logger.get_instance().info("Reading properties from file : " +
                                   file_path)
        config_parser = ConfigParser()

        config_parser.read(file_path)

        for section in config_parser.sections():
            options = config_parser.options(section)
            for option in options:

                try:
                    option = option.lower()
                    self.propertiesDict[option] = config_parser.get(
                        section, option)
                    if self.propertiesDict[option] == None:
                        raise RbpmotifException(
                            "PropertyManager.readProperties : Wrong property definition for key = "
                            + option + " in section " + section + " of file " +
                            file_path)
                except:
                    raise RbpmotifException(
                        "PropertyManager.readProperties : Abnormal property definition for key = "
                        + option + " in section " + section + " of file " +
                        file_path)
Exemplo n.º 9
0
    def output_reading(filename):

        input_file = FileUtils.open_text_r(filename)

        text_file = []

        lines = input_file.readlines()
        string = ''
        for n, line in enumerate(lines):
            if line[0:1] == '>' and n == 0:
                string += line[1:]
            elif line[0:1] != '>' and n != 0:
                string += line
            elif line[0:1] == '>' and n != 0:
                # append in string format the output of one protein
                text_file.append(string)
                # reset the string variable and add the header
                string = ''
                string += line[1:]
            else:
                Logger.get_instance().warning(' Check this line : ' + line)

        text_file.append(string)

        return text_file
Exemplo n.º 10
0
    def delete_file(namefile):

        try:
            os.remove(namefile)
            Logger.get_instance().info(" This file has been removed: " +
                                       namefile)
        except OSError:
            Logger.get_instance().info(" Cannot remove : " + namefile)
Exemplo n.º 11
0
 def stop_chrono(self, message):
     
     current_time = time.time()
     step_duration = current_time - self.lastTime
     total_duration = current_time - self.start_time
     Logger.get_instance().info ( "Step duration : " + Timer.format_duration( step_duration))
     Logger.get_instance().info ( "\n\nSTOP CHRONO : " + message + ". Total duration " + Timer.format_duration(total_duration))
     self.lastTime = 0
     self.start_time = 0
Exemplo n.º 12
0
 def del_column(table, index):
     if index > len(table[0]):
         Logger.get_instance().error(" TableWrapper.get_column : the column index is greater then column number of table\n")
         raise ParsingFileException(" TableWrapper.get_column : the column index is greater then column number of table\n")
     new_table = []
     for row in table:
         row.pop(index)
         new_table.append(row)
     return new_table
Exemplo n.º 13
0
    def open_text_a(path):
        try:
            file_handle = open(path, 'a')
        except IOError as detail:
            Logger.get_instance().critical\
            ("IOError:  Unable to open " + path + " : " + str(detail))
            exit()

        return file_handle
Exemplo n.º 14
0
    def open_text_r(path):
        try:
            file_handle = open(path, 'r')
        except IOError as ioe:
            Logger.get_instance().critical\
            ("FileUtils.open_text_r : IOError:  Unable to open '" + path + "' : " + str(ioe))
            raise RbpmotifException(
                "Unable to open file '" + path + "' : " + str(ioe), ioe)

        return file_handle
Exemplo n.º 15
0
    def make_iupred_file(input_path, output_path, th_1, th_2, num_aa, dataset_type):
        
        
        # initialization of file names
        file_name_1 = dataset_type + '_IupredTable' + '_t1_'+ str(th_1) + '_t2_' + str(th_2) + '.txt'
        file_name_2 = dataset_type + '_IupredRegion_' + str(th_1)  + '.txt'
        file_name_3 = dataset_type + '_IupredRegion_' + str(th_2)  + '.txt'
        
        
        num_aa_string = '('+ str(num_aa) +' AA)'
        
        # Files opening and title string writing
        file_1 = FileUtils.open_text_a(output_path + file_name_1)
        file_2 = FileUtils.open_text_a(output_path + file_name_2)
        file_3 = FileUtils.open_text_a(output_path + file_name_3)
        header_file_table = ['Protein', 'Fraction '+ str(th_1), 'Fraction ' + str(th_2), 'Region N.' +  num_aa_string +'th_'+ str(th_1) , 'Region N.'+ num_aa_string +'th_'+ str(th_2)]
        header_file_region = ['Protein', 'N', 'Start', 'End',  'Region length']
        
        header_string_table = '\t'.join(header_file_table)
        header_file_region = '\t'.join(header_file_region)
        
        file_1.write(header_string_table + '\n')
        file_2.write(header_file_region + '\n')
        file_3.write(header_file_region + '\n')
        
        # This command allows to taken the file names of protein that you want analyze
        list_file = subp.check_output(['ls', input_path])
        list_file = list_file.split('\n')
        if '' in list_file:
            list_file.remove('')

        # This section performs the iupred_string_info method (that calls also iupred_info method) 
        # for each protein file in list_file and simultaneously appends into files the output results
        # in a tab format
        for i, pred_file in enumerate(list_file):
            i += 1
            prot_id = pred_file.split('.')[0].split('_')[1]
            Logger.get_instance().info( str(i) + ' ' + prot_id)
            namefile = input_path + pred_file
            out_string = Iupred.iupred_string_info(namefile, prot_id, th_1, th_2, num_aa)
            
            string_file_1 = out_string[0]
            string_file_2 = out_string[1]
            string_file_3 = out_string[2]
            
            file_1.write(string_file_1 + '\n')
            file_2.write(string_file_2 + '\n')
            file_3.write(string_file_3 + '\n')
            
            
        file_1.close()
        file_2.close()
        file_3.close()
Exemplo n.º 16
0
    def check_length(feature, expected_length=None):

        length = len(feature)
        if expected_length is not None:
            if length == expected_length:
                Logger.get_instance().info(
                    " The length of dataset feature is that you expected\n")
            else:
                Logger.get_instance().info(
                    " The length of dataset feature isn't that you expected\n")
        else:
            pass
        return length
Exemplo n.º 17
0
    def domain_one_protein(domain_region_file, protname):

        file_domain = FileUtils.open_text_r(domain_region_file)

        # Importation of Dictionary
        dict_domain = pickle.load(file_domain)

        if protname in dict_domain:
            domain_prot = dict_domain[protname]
            return domain_prot
        else:
            Logger.get_instance().debug(" Protein without domains " + protname)
            return []
Exemplo n.º 18
0
    def global_anchor_analysis(file_motifs, input_folder, output_path):

        # Description of execution
        Logger.get_instance().info('Starting of Anchor Analysis')

        # The list file is provided calling a unix command
        try:
            LIST_FILE = subp.check_output(['ls', input_folder])
            LIST_FILE = LIST_FILE.split('\n')
            if '' in LIST_FILE:
                LIST_FILE.remove('')
                for fastafile in LIST_FILE:
                    prot = fastafile.split('.fasta')[0]
                    Logger.get_instance().info(' Anchor Analysis: ' + prot)
                    file_input = input_folder + fastafile
                    # Anchor tool
                    A = Anchor(output_path)
                    A.anchor_analysis(file_input, file_motifs, prot)
        except CalledProcessError as cpe:
            Logger.get_instance().error(
                'Anchor.global_anchor_analysis: Unable to execute listing of files in '
                + input_folder)
            raise RbpmotifException(
                'Anchor.global_anchor_analysis: Unable to execute listing of files in '
                + input_folder, cpe)

        Logger.get_instance().info(" End of Anchor Analysis")
Exemplo n.º 19
0
 def whole_procedure():
         
     dataset_type = PropertyManager.get_instance().get_property( DataConstants.MOTIFS_DATASET_TYPE_PROPERTY, True)
     
     # start chrono
     Timer.get_instance().start_chrono()
     Logger.get_instance().info("        ........Start of " + dataset_type + " Motifs Analysis.....\n ")
     
     motifs = MotifsAnalysis()
     motifs.iupred_motifs()
     motifs.anchor_motifs()
     motifs.disordp_motifs()
     
     Timer.get_instance().stop_chrono(" End of " + dataset_type + " Motifs Analysis")
Exemplo n.º 20
0
    def isoform_sequences(self):

        Logger.get_instance().info(
            " Starting the random selection of isoforms with same length \n")
        Logger.get_instance().info(
            " The following headers are the proteins randomly selected \n")

        self.path_output_longest = Constants.PATH_HOME + PropertyManager.get_instance(
        ).get_property(DataConstants.LONGEST_PATH_OUTPUT_PROPERTY, True)

        self.path_file_isoform = self.path_output_longest + PropertyManager.get_instance(
        ).get_property(DataConstants.ISOFORM_FILE_PROPERTY, True)
        self.path_file_selected_isoform = self.path_output_longest + PropertyManager.get_instance(
        ).get_property(DataConstants.RANDOM_ISOFORM_SEQ_PROPERTY, True)

        # The headers of a Isoform fasta file are taken by InfoFasta class
        # You make sure that the arg text is equal to False because the input object is a file and not a list

        self.headers = InfoFasta.get_header(self.path_file_isoform, text=False)

        # Extraction of genes form headers line
        # This vector contains double gene because the file contains some isoform of the same gene

        gene_isoform = []
        for header in self.headers:
            gene = header[1:16]
            gene_isoform.append(gene)

        # gene set creation
        unique_gene = set(gene_isoform)

        # This for loop flows on the unique gene
        #
        random_header = []
        old_num_isoform = 0
        for gene in unique_gene:
            # For each gene counts how many isoform has
            num_isoform = gene_isoform.count(gene)
            item = range(0, num_isoform)
            # Select one isoform randomly
            sel = random.choice(item)
            # The header selected randomly are stored in array
            random_header.append(self.headers[old_num_isoform:old_num_isoform +
                                              num_isoform][sel])
            old_num_isoform = old_num_isoform + num_isoform

        self.file_random_seq = FileUtils.open_text_a(
            self.path_file_selected_isoform)

        # The sequences corresponding to header selected are extracted from isoform file

        for header in random_header:
            Logger.get_instance().info('Header selected : ' + header)
            identifier = header[33:48]
            sequence = InfoFasta.get_seq(self.path_file_isoform, identifier)
            fasta_seq = SeqToFasta.give_fasta(header, sequence)
            self.file_random_seq.write(fasta_seq)

        Logger.get_instance().info(" End of selection random sequences \n ")
Exemplo n.º 21
0
 def make_dictionary(namefile):
     headers = InfoFasta.get_header(namefile)
     dict_header = {}
     for line in headers:
         line = line[1:]
         ids = line.split('|')
         if len(ids) != 2:
             if ids[0] in dict_header:
                 dict_header[ids[0]].append(ids[2])
             else:
                 dict_header[ids[0]] = [ids[2]]
         else:
             Logger.get_instance().info( " This gene has a protein sequence unavailable : " + line)
             pass
     
     return dict_header
Exemplo n.º 22
0
 def anchor_motifs(self):
     
     Logger.get_instance().info( "        .....Start of IUPred motifs analysis.....\n")
     
     self.anchor_folder = self.path_home +  PropertyManager.get_instance().get_property( DataConstants.MOTIFS_ANCHOR_FOLDER_PROPERTY, True)
     self.anchor_output_folder = self.path_home +  PropertyManager.get_instance().get_property( DataConstants.MOTIFS_ANCHOR_OUTPUT_FOLDER_PROPERTY, True)
     
     Timer.get_instance().step(" Start of ANCHOR motifs analysis \n")
     
     GlobalOverlapRegionAnalysis.anchor_overlap_analysis(self.protein_list,self.anchor_folder,self.anchor_output_folder,
                                                         self.motif_folder,self.domain_region_file)
     
      
     Timer.get_instance().step(" End of IUPred motifs analysis \n")
     
     Logger.get_instance().info( "        .....End of ANCHOR motifs analysis.....\n")
Exemplo n.º 23
0
    def whole_procedure():

        # start chrono
        Timer.get_instance().start_chrono()
        Logger.get_instance().info("Start of the sequences extraction.....\n ")

        D = DownloadEnsemblSeq()

        #D.download_product_gene_seq()

        #D.make_dictionary()
        #D.get_longest_seq()
        #D.isoform_sequences()
        #D.merger_sequences()

        Timer.get_instance().stop_chrono(' End of the sequences extraction')
Exemplo n.º 24
0
 def split_dataset(self):
         
         
     Logger.get_instance().info( " Division of Dataset in many fasta file each containing one protein sequence")
     
     self.path_home = Constants.PATH_HOME
         
     self.split_path_input = self.path_home + PropertyManager.get_instance().get_property( DataConstants.SPLIT_PATH_INPUT_PROPERTY, True)
     self.split_file_fasta = self.split_path_input + PropertyManager.get_instance().get_property( DataConstants.SPLIT_DATASET_PROPERTY, True)
     self.split_path_output = Constants.PATH_HOME + PropertyManager.get_instance().get_property( DataConstants.SPLIT_PATH_OUTPUT_PROPERTY, True)
     self.split_start_index = PropertyManager.get_instance().get_property( DataConstants.SPLIT_START_HEADER_PROPERTY, True)
     self.split_end_index = PropertyManager.get_instance().get_property( DataConstants.SPLIT_END_HEADER_PROPERTY, True)
     
     SplitSeq.split_seq( self.split_file_fasta, self.split_path_output, int(self.split_start_index), int(self.split_end_index))
     
         
     Logger.get_instance().info( " The dataset has been split in many fasta files ")
Exemplo n.º 25
0
 def whole_procedure():
         
     
     # start chrono
     Timer.get_instance().start_chrono()
     Logger.get_instance().info("Start of Disorder Analysis.....\n ")
        
     disorder = DisorderAnalysis()
     #disorder.change_header()
     #disorder.split_dataset()
     #disorder.anchor_analysis()
     #disorder.iupred_analysis()
     #disorder.analysis_tools_output()
     #disorder.disordpbind_analysis()
     #disorder.particular_analysis()
         
     Timer.get_instance().stop_chrono(' End of Disorder Analysis')
Exemplo n.º 26
0
    def make_disordp_file(input_path, output_path, binding_partner, num_aa,
                          dataset_type):

        # initialization of file names
        file_name_1 = dataset_type + '_DisoRDPbindTable.txt'
        file_name_2 = dataset_type + '_DisoRDPbindRegion.txt'

        num_aa_string = '(' + str(num_aa) + ' AA)'

        # Files opening and title string writing
        file_1 = FileUtils.open_text_a(output_path + file_name_1)
        file_2 = FileUtils.open_text_a(output_path + file_name_2)
        header_file_table = [
            'Protein', 'Fraction ', 'Region N.' + num_aa_string
        ]
        header_file_region = ['Protein', 'N', 'Start', 'End', 'Region length']

        header_string_table = '\t'.join(header_file_table)
        header_file_region = '\t'.join(header_file_region)

        file_1.write(header_string_table + '\n')
        file_2.write(header_file_region + '\n')

        # Reading of DisoRDPbind output file
        protein_output_list = DisoRDPbind.output_reading(input_path)

        for n, output in enumerate(protein_output_list):
            if 'WARNING:' in output:
                prot = output.split('\n')[0]
                Logger.get_instance().warning(
                    str(n + 1) + "\n This protein contains >=10000 residues\
 (DisoRBDbind cannot predict the proteins with size >=10000) " + prot)
            else:
                Logger.get_instance().info(str(n + 1))
                results = DisoRDPbind.disordp_string_info(
                    output, binding_partner, num_aa)

                string_file_1 = results[0]
                string_file_2 = results[1]

                file_1.write(string_file_1 + '\n')
                file_2.write(string_file_2 + '\n')

        file_1.close()
        file_2.close()
Exemplo n.º 27
0
    def disordp_motifs(self):
        
        Logger.get_instance().info( "        .....Start of DisoRDPbind motifs analysis.....\n")
        
        self.disordp_folder = self.path_home +  PropertyManager.get_instance().get_property( DataConstants.MOTIFS_DISORDP_FOLDER_PROPERTY, True)
        self.disordp_output_folder = self.path_home +  PropertyManager.get_instance().get_property( DataConstants.MOTIFS_DISORDP_OUTPUT_FOLDER_PROPERTY, True)
        self.filename =  PropertyManager.get_instance().get_property( DataConstants.MOTIFS_DISORDP_FILE_PROPERTY,True)
        
        Timer.get_instance().step(" Start of DisoRDPbind motifs analysis \n")
        
        GlobalOverlapRegionAnalysis.disordp_overlap_analysis(self.protein_list, self.disordp_folder, self.filename, self.motif_folder,
                                                             self.domain_region_file,self.disordp_output_folder)
        
        
        
        Timer.get_instance().step(" End of DisoRDPbind motifs analysis \n")

        Logger.get_instance().info( "        .....End of DisoRDPbind motifs analysis.....\n")
Exemplo n.º 28
0
 def inv_column(table, index1, index2):
     if index1 > len(table[0]) or index2 > len(table[0]):
         Logger.get_instance().error(" TableWrapper.get_column : the column index is greater then column number of table\n")
         raise ParsingFileException(" TableWrapper.get_column : the column index is greater then column number of table\n")
     new_columns = []
     new_table = []
     num = len(table[0])
     for i in range(num):
         if i != index1 and i != index2:                
             new_columns.append(TableWrapper.get_column(table,i))
         elif i == index1:
             new_columns.append(TableWrapper.get_column(table, index2))
         elif i == index2:
             new_columns.append(TableWrapper.get_column(table, index1))
     for item in zip(*new_columns):
         item = list(item)
         new_table.append(item)
     return new_table
Exemplo n.º 29
0
    def anchor_overlap_analysis(protein_list, anchor_folder, output_folder,
                                motif_folder, domain_region_file):

        toolname = Constants.ANCHOR_TOOL

        for num, protein in enumerate(protein_list):
            prot_counter = num + 1
            result = math.fmod(prot_counter, 100)
            if result == 0.0:
                Logger.get_instance().info('        ' + str(prot_counter) +
                                           "th  Protein analyzed: " + protein)
            # This line takes the iupred region of a protein
            anchor_region = OverlapMotifsRegions.anchor_region(
                anchor_folder, protein)
            table_anchor = OverlapMotifsRegions.overlap_analysis(
                protein, anchor_region, toolname, motif_folder,
                domain_region_file)
            # table Writing
            GlobalOverlapRegionAnalysis.overlap_file_write(
                table_anchor, output_folder, protein, toolname)
Exemplo n.º 30
0
    def download_product_gene_seq(self):

        Logger.get_instance().info(
            " Start of sequences download from Ensembl..")

        self.path_home = Constants.PATH_HOME
        self.gene_list_input = self.path_home + PropertyManager.get_instance(
        ).get_property(DataConstants.DOWNLOAD_ENSEMBL_FILE_INPUT_LIST_PROPERTY,
                       True)
        self.ensembl_seq_output = self.path_home + PropertyManager.get_instance(
        ).get_property(DataConstants.DOWNLOAD_ENSEMBL_FILE_OUPUT_SEQ_PROPERTY,
                       True)
        self.type_query = PropertyManager.get_instance().get_property(
            DataConstants.DOWNLOAD_ENSEMBL_TYPE_QUERY_PROPERTY, True)

        Ensembl.list_get_seq(self.gene_list_input,
                             int(self.type_query),
                             path_output=self.ensembl_seq_output)

        Logger.get_instance().info(" End of sequences download from Ensembl..")