Exemplo n.º 1
0
    def protocol(self):
        """Main entrypoint for the protocol"""

        #Create a output log file that we can append to
        with open(self.directory + self.output_prefix + "_" + strftime("%m_%d_%Y") + "-" +
                 strftime("%H_%M_%S") + '_output.txt', 'w') as file_output:
            file_output.write(self.pact_preamble + "\n")

            """
            *****************************************
            Pact Combine Section (Required)
            *****************************************
            """
            #Check to see if the section is there
            if not self.obj_cfgparser.has_section('combinepact'):           
                print("[Protocols:Epitope Mapping Error] The combinepact config file is incorrect.")
                print("[Protocols:Epitope Mapping Error] There is something wrong with the [combinepact] section.")
                quit()

            #Import our combinepact class
            try:
                from pact.analysis.combine_pact import combine_pact
            except ImportError:
                print("[Protocols:Epitope Mapping Error] combine_pact was not found.")

            #Create the object then call the merger
            obj_combine = combine_pact(self.obj_cfgparser, self.dict_programs, {})

            #The dict will be like {'dataset name': {data...
            dict_merged_datasets = obj_combine.combine_pact()

            """
            *****************************************
            PDB Import Section
            *****************************************
            """
            #Only import and run if selected
            if self.dict_workflow['pdb_import']:

                #Check to see if the section is there
                if not self.obj_cfgparser.has_section('pdb_import'):           
                    print("[Protocols:Epitope Mapping Error] The pdb_import config file is incorrect.")
                    print("[Protocols:Epitope Mapping Error] There is something wrong with the [pdb_import] section.")
                    quit()

                #Import our combinepact class
                try:
                    from pact.analysis.pdb_import import pdb_import
                except ImportError:
                    print("[Protocols:Epitope Mapping Error] pdb_import was not found.")

                #Create the object then call the merger
                obj_pdb = pdb_import(self.obj_cfgparser, self.dict_programs, {'directory':self.directory})

                #The dict will be like {'pdb name': {data...
                dict_pdb = obj_pdb.pdb_import()

        return
Exemplo n.º 2
0
    def protocol(self):
        """Provide a protocol that does general analyses that don't need a full protocol"""

        #Create a output log file that we can append to
        with open(self.directory + self.output_prefix + "_" + strftime("%m_%d_%Y") + "-" +
                 strftime("%H_%M_%S") + '_output.txt', 'w') as file_output:
            file_output.write(self.pact_preamble + "\n")

            """
            *****************************************
            Pact Combine (Required)
            *****************************************
            """
            #Check to see if the section is there
            if not self.obj_cfgparser.has_section('combinepact'):           
                print("[Protocols:Analysis Error] The combinepact config file is incorrect.")
                print("[Protocols:Analysis Error] There is something wrong with the [combinepact] section.")
                quit()

            #Import our combinepact class
            try:
                from pact.analysis.combine_pact import combine_pact
            except ImportError:
                print("[Protocols:Analysis Error] combine_pact was not found.")

            #Create the object then call the merger
            obj_combine = combine_pact(self.obj_cfgparser, self.dict_programs, {})

            #Print Section Progress
            print("[Protocols:Analysis] Combine PACT")

            #The dict will be like {'dataset name': {data...
            dict_merged_datasets = obj_combine.combine_pact()

            """
            *****************************************
            PDB Import Section
            *****************************************
            """
            #Only import and run if selected
            if self.dict_workflow['pdb_import']:

                #Check to see if the section is there
                if not self.obj_cfgparser.has_section('pdb_import'):           
                    print("[Protocols:Analysis Error] The pdb_import config file is incorrect.")
                    print("[Protocols:Analysis Error] There is something wrong with the [pdb_import] section.")
                    quit()

                #Import our combinepact class
                try:
                    from pact.analysis.pdb_import import pdb_import
                except ImportError:
                    print("[Protocols:Analysis Error] pdb_import was not found.")

                #Create the object then call the merger
                obj_pdb = pdb_import(self.obj_cfgparser, self.dict_programs, {'directory':self.directory})

                #Print Section Progress
                print("[Protocols:Analysis] PDB Import")

                #The dict will be like {'pdb name': {data...
                dict_pdb = obj_pdb.pdb_import()

            """
            *****************************************
            Assign colors to classifiers
            *****************************************
            """
            #Only import and run if selected
            if self.dict_workflow['classifier_color']:

                #Check to see if the section is there
                if not self.obj_cfgparser.has_section('classifier_color'):           
                    print("[Protocols:Analysis Error] The classifier_color config file is incorrect.")
                    print("[Protocols:Analysis Error] There is something wrong with the [classifier_color] section.")
                    quit()

                #Import our combinepact class
                try:
                    from pact.analysis.classifier_color import classifier_color
                except ImportError:
                    print("[Protocols:Analysis Error] classifer_color was not found.")

                #Create the object then call the merger
                obj_classcolor = classifier_color(self.obj_cfgparser, self.dict_programs, {})

                #Print Section Progress
                print("[Protocols:Analysis] Classifier Color")

                #This section returns a dict of [loc][mut] = "color"
                if self.obj_cfgparser.get("classifier_color", "classifier").split(',')[0] == "pdb":
                    dict_custom_color = obj_classcolor.classifier_color(dict_merged_datasets, dict_pdb, "pdb")

            """
            *****************************************
            Set vs Set Section
            *****************************************
            """
            #Check to see if the section is there
            if not self.obj_cfgparser.has_section('setvsset'):           
                print("[Protocols:Analysis Error] The setvsset config file is incorrect.")
                print("[Protocols:Analysis Error] There is something wrong with the [setvsset] section.")
                quit()

            #Import our setvsset class
            try:
                from pact.analysis.set_vs_set import set_vs_set
            except ImportError:
                print("[Protocols:Analysis Error] set_vs_set was not found.")

            #Create the object then call the merger
            obj_svs = set_vs_set(self.obj_cfgparser, self.dict_programs, {'directory':self.directory})

            #Do we have structural data?
            if self.dict_workflow['classifier_color']:
                print("[Protocols:Analysis] Dataset vs Dataset")
                file_output.write(obj_svs.set_vs_set(dict_merged_datasets, dict_custom_color))
            else:
                print("[Protocols:Analysis] Dataset vs Dataset")
                file_output.write(obj_svs.set_vs_set(dict_merged_datasets))

        return
Exemplo n.º 3
0
    def protocol(self):
        """Main entrypoint for the protocol"""

        #Create a output log file that we can append to
        with open(
                self.directory + self.output_prefix + "_" +
                strftime("%m_%d_%Y") + "-" + strftime("%H_%M_%S") +
                '_output.txt', 'w') as file_output:
            file_output.write(self.pact_preamble + "\n")

            #Import our class
            try:
                from pact.analysis.sequence.homology_pssm import homology_classifier
            except ImportError:
                print(
                    "[Protocols:Homology Error] pact.analysis.sequence.homology_pssm was not found."
                )
                quit()

            #Create our object
            obj_homology = homology_classifier(self.obj_cfgparser,
                                               self.dict_programs,
                                               {'directory': self.directory})
            """
            *****************************************
            DNA Filtering/Alignment Section
            *****************************************
            """
            if self.dict_workflow['blastp_align_filter']:

                #Convert our XML file
                print("[Protocols:Homology] xml_to_fasta")
                file_output.write("[Protocols:Homology] xml_to_fasta\n")
                obj_homology.xml_to_fasta()

                #Run CD-HIT on our new fasta file
                print("[Protocols:Homology] cdhit")
                file_output.write("[Protocols:Homology] cdhit\n")

                #Check to see if the number of processes is logical
                self.processes = self.obj_cfgparser.get(
                    "blastp_align_filter", "processes")
                if int(self.processes) <= 0:
                    self.processes = "2"

                check_output([
                    self.dict_programs['cdhit'], "-i",
                    self.directory + self.output_prefix + ".fa", "-o",
                    self.directory + self.output_prefix + ".afa", "-c",
                    str(
                        self.obj_cfgparser.get("blastp_align_filter",
                                               "cdhit_clustering_threshold")),
                    "-M", "40000", "-T",
                    str(self.processes)
                ])

                #Check to see if we have WT in our cdhit output
                print("[Protocols:Homology] cdhit_wtcheck")
                file_output.write("[Protocols:Homology] cdhit_wtcheck\n")
                obj_homology.cdhit_wt_check()

                #Run MUSCLE on our new fasta file
                print("[Protocols:Homology] muscle")
                file_output.write("[Protocols:Homology] muscle\n")
                check_output([
                    self.dict_programs['muscle'], "-in",
                    self.directory + self.output_prefix + ".afa", "-out",
                    self.directory + self.output_prefix + ".msa"
                ])

                #Process our MSA (needs to be on for PSIBlast)
                print("[Protocols:Homology] processmsa")
                file_output.write("[Protocols:Homology] processmsa\n")
                list_msa = obj_homology.process_msa()

                #Save our list
                print("[Protocols:Homology] Saving our MSA")
                file_output.write("[Protocols:Homology] Saving our MSA\n")
                save_pact_file(
                    list_msa,
                    self.directory + self.output_prefix + '_' + "list_msa")
            """
            *****************************************
            PSSM Section
            *****************************************
            """
            if self.dict_workflow['pssm']:
                #Open our list
                print("[Protocols:Homology] Opening our MSA")
                file_output.write("[Protocols:Homology] Opening our MSA\n")
                list_msa = open_pact_file(self.directory + self.output_prefix +
                                          '_' + "list_msa")

                #Split our msa for PSIBlast (needs to be on for PSIBlast)
                print("[Protocols:Homology] msa_split")
                file_output.write("[Protocols:Homology] msa_split\n")
                list_pbcmds = obj_homology.msa_split(list_msa)

                #Run PSIBlast
                print("[Protocols:Homology] psiblast")
                file_output.write("[Protocols:Homology] psiblast\n")
                for command in list_pbcmds:
                    check_output([self.dict_programs['psiblast'], *command])

                #Import our PSSM data
                print("[Protocols:Homology] pssm_file_import")
                file_output.write("[Protocols:Homology] pssm_file_import\n")
                dict_pssm = obj_homology.pssm_file_import()

                #Save our heatmap
                print("[Protocols:Homology] Saving a PSSM .csv heatmap")
                file_output.write(
                    obj_homology.pssm_output_heat(dict_pssm) + "\n")

                #Save our csv
                print("[Protocols:Homology] Saving a PSSM .csv column data")
                file_output.write(
                    obj_homology.pssm_output_csv(dict_pssm) + "\n")

                #Save our PACT File
                print("[Protocols:Homology] Saving a PSSM .pact file")
                file_output.write(
                    save_pact_file(
                        dict_pssm, self.directory + self.output_prefix + '_' +
                        "PSSM") + "\n")
            """
            *****************************************
            Sitewise Frequency Section
            *****************************************
            """
            if self.dict_workflow['site_frequencies']:
                #Open our list
                print("[Protocols:Homology] Opening our MSA")
                file_output.write("[Protocols:Homology] Opening our MSA\n")
                list_msa = open_pact_file(self.directory + self.output_prefix +
                                          '_' + "list_msa")

                #Calculate our frequencies
                print("[Protocols:Homology] Calculate our frequencies")
                file_output.write(
                    "[Protocols:Homology] Calculate our frequencies\n")
                dict_freq = obj_homology.msa_freq(list_msa)

                #Save our CSV heatmap
                print("[Protocols:Homology] Saving the frequencies heatmap")
                file_output.write(
                    "[Protocols:Homology] Saving the frequencies heatmap\n")
                obj_homology.freq_output_heat(dict_freq)

                #Save our PACT File
                print("[Protocols:Homology] Saving a Freq .pact file")
                file_output.write(
                    save_pact_file(
                        dict_freq, self.directory + self.output_prefix + '_' +
                        "freq") + "\n")
            """
            *****************************************
            Read stored .pact files
            *****************************************
            """
            if self.dict_workflow['pssm_reader']:
                #Open our PACT File
                print("[Protocols:" + str_protocol_name +
                      "] Opening a PSSM .pact file")
                dict_pssm = open_pact_file(self.directory +
                                           self.output_prefix + '_' + "PSSM")

                #Count our classifiers
                print("[Protocols:" + str_protocol_name +
                      "] PSSM Classifier Count")
                file_output.write("[Protocols:" + str_protocol_name +
                                  "] PSSM Classifier Count")
            """
            *****************************************
            Pact Combine Section
            *****************************************
            """
            if self.dict_workflow['combinepact']:

                #Check to see if the section is there
                if not self.obj_cfgparser.has_section('combinepact'):
                    print(
                        "[Protocols:Homology Error] The combinepact config file is incorrect."
                    )
                    print(
                        "[Protocols:Homology Error] There is something wrong with the [combinepact] section."
                    )
                    quit()

                #Import our combinepact class
                try:
                    from pact.analysis.combine_pact import combine_pact
                except ImportError:
                    print(
                        "[Protocols:Homology Error] combine_pact was not found."
                    )

                #Create the object then call the merger
                obj_combine = combine_pact(self.obj_cfgparser,
                                           self.dict_programs, {})

                #The dict will be like {'dataset name': {data...
                dict_merged_datasets = obj_combine.combine_pact()
            """
            *****************************************
            Analysis Section
            *****************************************
            """
            if self.dict_workflow['analysis_sitefitness_homology']:
                #Which dataset do we want?
                if self.obj_cfgparser.get('analysis_sitefitness_homology',
                                          'dataset_x') == "site_frequencies":
                    file_name = "freq"
                else:
                    file_name = "pssm"

                #Open our dict
                print("[Protocols:Homology] Opening our homology data")
                file_output.write(
                    "[Protocols:Homology] Opening our site freqs\n")
                dict_homology = open_pact_file(self.directory +
                                               self.output_prefix + '_' +
                                               file_name)

                #Plot our data
                if self.obj_cfgparser.get('analysis_sitefitness_homology',
                                          'scatter') == "True":
                    print("[Protocols:Homology] Plotting the figure")
                    file_output.write(
                        "[Protocols:Homology] Plotting the figure\n")
                    obj_homology.analysis_site_fit_homology_plot(
                        dict_homology, dict_merged_datasets, file_name)

                #Plot our data
                print("[Protocols:Homology] Making our classifier table")
                file_output.write(
                    "[Protocols:Homology] Making our classifier table\n")
                obj_homology.analysis_site_fit_homology_classifier(
                    dict_homology, dict_merged_datasets, file_name)

        return
Exemplo n.º 4
0
    def protocol(self):
        """Main entrypoint for the protocol"""

        #Create a output log file that we can append to
        with open(self.directory + self.output_prefix + "_" + strftime("%m_%d_%Y") + "-" +
                 strftime("%H_%M_%S") + '_output.txt', 'w') as file_output:
            file_output.write(self.pact_preamble + "\n")

            """
            *****************************************
            Pact Combine (Required)
            *****************************************
            """
            #Check to see if the section is there
            if not self.obj_cfgparser.has_section('combinepact'):           
                print("[Protocols:" + str_protocol_name + " Error] The combinepact config file is incorrect.")
                print("[Protocols:" + str_protocol_name + " Error] There is something wrong with the [combinepact] section.")
                quit()

            #Import our combinepact class
            try:
                from pact.analysis.combine_pact import combine_pact
            except ImportError:
                print("[Protocols:" + str_protocol_name + " Error] combine_pact was not found.")

            #Create the object then call the merger
            obj_combine = combine_pact(self.obj_cfgparser, self.dict_programs, {})

            #The dict will be like {'dataset name': {data...
            dict_merged_datasets = obj_combine.combine_pact()

            """
            *****************************************
            Classify our mutations
            *****************************************
            """
            #Build a dict_classified with [location][mutation] = "DEL/NEU/BEN/NONE"
            
            #Get the config file elements
            try:
                class_column = self.obj_cfgparser.get("variant_classification", "class_column").lower()
                class_threshold = float(self.obj_cfgparser.get("variant_classification", "class_threshold"))
            except NoOptionError:
                print("[Protocols:" + str_protocol_name + " Error] Missing [variant_classification] config file elements.")
                quit()
            except ValueError:
                print("[Protocols:" + str_protocol_name + " Error] Incorrect [variant_classification] config file elements.")
                quit()
            except TypeError:
                print("[Protocols:" + str_protocol_name + " Error] Incorrect [variant_classification] config file elements.")
                quit()

            #Make a dict to add our classifications into
            dict_classified = {}

            #Classify each dataset
            for dataset in dict_merged_datasets:

                #Add if not existing
                if dataset not in dict_classified:
                    dict_classified[dataset] = {}

                #Loop the locations
                for loc in dict_merged_datasets[dataset]:
                
                    #Add a new location if not in the dict
                    if loc not in dict_classified[dataset]:
                        dict_classified[dataset][loc] = {}

                    #Loop the muts
                    for mut in dict_merged_datasets[dataset][loc]:

                        #Skip WT, stop, and NaN
                        if (mut == dict_merged_datasets[dataset][loc][mut]['wt_residue'] or
                            mut == "*" or
                            dict_merged_datasets[dataset][loc][mut][class_column] == "NaN"):

                            dict_classified[dataset][loc][mut] = "UNCLASSIFIED"
                            continue

                        #Get the fitness value from the dataset
                        mut_value = float(dict_merged_datasets[dataset][loc][mut][class_column])

                        #Assign a classification of deleterious, slightly deleterious, or neutral
                        if mut_value <= (-1 * class_threshold):
                            dict_classified[dataset][loc][mut] = "DEL"

                        elif (mut_value > (-1 * class_threshold) and mut_value < class_threshold):
                            dict_classified[dataset][loc][mut] = "NEU"

                        elif mut_value >= class_threshold:
                            dict_classified[dataset][loc][mut] = "BEN"

            """
            *****************************************
            Count the basal classifiers
            *****************************************
            """
            if self.dict_workflow['basal_count']:

                #Import our class
                try:
                    from pact.analysis.basal_count import basal_count
                except ImportError:
                    print("[Protocols:" + str_protocol_name + " Error] pact.analysis.basal_count was not found.")
                    quit()
        
                #Create our object
                obj_basal = basal_count(self.obj_cfgparser, self.dict_programs, {})

                #Count our basal rates
                for dataset in dict_classified:
                    print("[Protocols:" + str_protocol_name + "] Basal Fitness Counts for dataset: " + dataset)
                    file_output.write("[Protocols:" + str_protocol_name + "] Basal Fitness Counts for dataset: " + dataset)

                    file_output.write(obj_basal.basal_count(dict_classified[dataset]) + "\n")

            """
            *****************************************
            DNA Filtering/Alignment or PSSM Object
            *****************************************
            """
            if (self.dict_workflow['blastp_align_filter'] or 
                self.dict_workflow['pssm'] or 
                self.dict_workflow['pssm_reader']):

                #Import Check Output
                from subprocess import check_output

                #Import our class
                try:
                    from pact.analysis.sequence.homology_pssm import homology_classifier
                except ImportError:
                    print("[Protocols:" + str_protocol_name + " Error] pact.analysis.sequence.homology_pssm was not found.")
                    quit()
        
                #Create our object
                obj_homology = homology_classifier(self.obj_cfgparser, self.dict_programs, {'directory':self.directory})

                #Count our classifiers
                print("[Protocols:" + str_protocol_name + "] Homology PSSM")
                file_output.write("[Protocols:" + str_protocol_name + "] Homology PSSM")

            """
            *****************************************
            DNA Filtering/Alignment
            *****************************************
            """
            if self.dict_workflow['blastp_align_filter']:
               
                #Convert our XML file
                print("[Protocols:" + str_protocol_name + "] xml_to_fasta")
                file_output.write("[Protocols:" + str_protocol_name + "] xml_to_fasta\n")
                obj_homology.xml_to_fasta()

                #Run CD-HIT on our new fasta file
                print("[Protocols:" + str_protocol_name + "] cdhit")
                file_output.write("[Protocols:" + str_protocol_name + "] cdhit\n")

                #Check to see if the number of processes is logical
                self.processes = self.obj_cfgparser.get("blastp_align_filter", "processes")
                if int(self.processes) <= 0:
                    self.processes = "2"

                check_output([self.dict_programs['cdhit'],
                              "-i",
                              self.directory + self.output_prefix + ".fa",
                              "-o",
                              self.directory + self.output_prefix + ".afa",
                              "-c",
                              str(self.obj_cfgparser.get("blastp_align_filter", "cdhit_clustering_threshold")),
                              "-M",
                              "40000",
                              "-T",
                              str(self.processes)])

                #Check to see if we have WT in our cdhit output
                print("[Protocols:" + str_protocol_name + "] cdhit_wtcheck")
                file_output.write("[Protocols:" + str_protocol_name + "] cdhit_wtcheck\n")
                obj_homology.cdhit_wt_check()

                #Run MUSCLE on our new fasta file
                print("[Protocols:" + str_protocol_name + "] muscle")
                file_output.write("[Protocols:" + str_protocol_name + "] muscle\n")
                check_output([self.dict_programs['muscle'],
                              "-in",
                              self.directory + self.output_prefix + ".afa",
                              "-out",
                              self.directory + self.output_prefix + ".msa"])

                #Process our MSA (needs to be on for PSIBlast)
                print("[Protocols:" + str_protocol_name + "] processmsa")
                file_output.write("[Protocols:" + str_protocol_name + "] processmsa\n")
                list_msa = obj_homology.process_msa()

                #Save our list
                print("[Protocols:" + str_protocol_name + "] Saving our MSA")
                file_output.write("[Protocols:" + str_protocol_name + "] Saving our MSA\n")
                save_pact_file(list_msa, self.directory + self.output_prefix + '_' + "list_msa")

            """
            *****************************************
            PSSM
            *****************************************
            """
            if self.dict_workflow['pssm']:
                #Open our list
                print("[Protocols:" + str_protocol_name + "] Opening our MSA")
                file_output.write("[Protocols:" + str_protocol_name + "] Opening our MSA\n")
                list_msa = open_pact_file(self.directory + self.output_prefix + '_' + "list_msa")

                #Split our msa for PSIBlast (needs to be on for PSIBlast)
                print("[Protocols:" + str_protocol_name + "] msa_split")
                file_output.write("[Protocols:" + str_protocol_name + "] msa_split\n")
                list_pbcmds = obj_homology.msa_split(list_msa)

                #Run PSIBlast
                print("[Protocols:" + str_protocol_name + "] psiblast")
                file_output.write("[Protocols:" + str_protocol_name + "] psiblast\n")
                for command in list_pbcmds:
                    check_output([self.dict_programs['psiblast'], *command])

                #Import our PSSM data
                print("[Protocols:" + str_protocol_name + "] pssm_file_import")
                file_output.write("[Protocols:" + str_protocol_name + "] pssm_file_import\n")
                dict_pssm = obj_homology.pssm_file_import()

                #Save our heatmap
                print("[Protocols:" + str_protocol_name + "] Saving a PSSM .csv heatmap")
                file_output.write(obj_homology.pssm_output_heat(dict_pssm) + "\n")

                #Save our csv
                print("[Protocols:" + str_protocol_name + "] Saving a PSSM .csv column data")
                file_output.write(obj_homology.pssm_output_csv(dict_pssm) + "\n")

                #Save our PACT File
                print("[Protocols:" + str_protocol_name + "] Saving a PSSM .pact file")
                file_output.write(save_pact_file(dict_pssm, self.directory + self.output_prefix + '_' + "PSSM") + "\n")

            """
            *****************************************
            Read stored PSSM files
            *****************************************
            """
            if self.dict_workflow['pssm_reader'] or self.dict_workflow['wt_consensus']:
                #Open our PACT File
                print("[Protocols:" + str_protocol_name + "] Opening a PSSM .pact file")
                dict_pssm = open_pact_file(self.directory + self.output_prefix + '_' + "PSSM")

                #Count our classifiers
                print("[Protocols:" + str_protocol_name + "] PSSM Classifier Count")
                file_output.write("[Protocols:" + str_protocol_name + "] PSSM Classifier Count")
                

                for dataset in dict_classified:
                    print("[Protocols:" + str_protocol_name + "] PSSM Fitness Rates for dataset: " + dataset)
                    file_output.write("[Protocols:" + str_protocol_name + "] PSSM Fitness Rates for dataset: " + dataset)
                    file_output.write(obj_homology.classified_count_pssm(dict_pssm, dict_classified[dataset]) + "\n")

                    print("[Protocols:" + str_protocol_name + "] Wrote CSV of fitness values categorized by PSSM group and mutation type for dataset: " + dataset)
                    file_output.write("[Protocols:" + str_protocol_name + "] PSSM Fitness Rates for dataset: " + dataset)
                    file_output.write(obj_homology.classified_count_pssm_csv(dict_pssm, dict_classified[dataset], 
                                                                         dict_merged_datasets, dataset, class_column) + "\n")

            """
            *****************************************
            PDB Import Section
            *****************************************
            """
            #Only import and run if selected
            if self.dict_workflow['pdb_import']:

                #Check to see if the section is there
                if not self.obj_cfgparser.has_section('pdb_import'):           
                    print("[Protocols:" + str_protocol_name + " Error] The pdb_import config file is incorrect.")
                    print("[Protocols:" + str_protocol_name + " Error] There is something wrong with the [pdb_import] section.")
                    quit()

                #Import our combinepact class
                try:
                    from pact.analysis.pdb_import import pdb_import
                except ImportError:
                    print("[Protocols:" + str_protocol_name + " Error] pdb_import was not found.")

                #Create the object then call the merger
                obj_pdb = pdb_import(self.obj_cfgparser, self.dict_programs, {'directory':self.directory})

                #The dict will be like {'pdb name': {data...
                dict_pdb = obj_pdb.pdb_import()

            """
            *****************************************
            Back to Consensus Analyses
            *****************************************
            """
            if self.dict_workflow['consensus']:
                #Import our class
                try:
                    from pact.analysis.sequence.consensus import consensus
                except ImportError:
                    print("[Protocols:" + str_protocol_name + "] pact.analysis.basal_count was not found.")
                    quit()
        
                #Create our object
                obj_consensus = consensus(self.obj_cfgparser, self.dict_programs, {'directory':self.directory})

                #Get the wild-type sequence information
                dict_wtcons = obj_consensus.wt_consensus(dict_pssm)

                #Get the prob of finding a classified mutation
                for dataset in dict_classified:
                    print("[Protocols:" + str_protocol_name + "] Mut Classification vs WT Cons for dataset: " + dataset)
                    obj_consensus.wtcons_count_class(dict_wtcons, dict_classified[dataset], dataset)

                #Get the prob of finding a classified mutation
                for dataset in dict_classified:
                    print("[Protocols:" + str_protocol_name + "] Mutating a non-conserved site to a conserved site: " + dataset)
                    file_output.write(obj_consensus.nonconserved_sites(dict_wtcons, dict_pssm, dict_classified[dataset], dataset))

                #Get the prob of finding a classified mutation
                for dataset in dict_classified:
                    print("[Protocols:" + str_protocol_name + "] Mutating a non-conserved site to a non-conserved mutation: " + dataset)
                    file_output.write(obj_consensus.nonconserved_mutations(dict_wtcons, dict_pssm, dict_classified[dataset], dataset))

                #Get the cross set distribution
                obj_consensus.cons_count_setvset(dict_wtcons, dict_pssm, dict_classified)

                if self.dict_workflow['pdb_import']:
                    for dataset in dict_classified:
                        print("[Protocols:" + str_protocol_name + 
                              "] Mutating a non-conserved site to a conserved site (Buried Residues Only): " + dataset)
                        file_output.write(obj_consensus.nonconserved_sites_burial(
                            dict_wtcons, dict_pssm, dict_classified[dataset], dataset, dict_pdb, "<"))

                    for dataset in dict_classified:
                        print("[Protocols:" + str_protocol_name + 
                              "] Mutating a non-conserved site to a conserved site (Surface Residues Only): " + dataset)
                        file_output.write(obj_consensus.nonconserved_sites_burial(
                            dict_wtcons, dict_pssm, dict_classified[dataset], dataset, dict_pdb, ">="))

        return
Exemplo n.º 5
0
    def protocol(self):
        """Main entrypoint for the protocol"""

        #Create a output log file that we can append to
        with open(
                self.directory + self.output_prefix + "_" +
                strftime("%m_%d_%Y") + "-" + strftime("%H_%M_%S") +
                '_output.txt', 'w') as file_output:
            file_output.write(self.pact_preamble + "\n")
            """
            *****************************************
            Pact Combine (Required)
            *****************************************
            """
            #Check to see if the section is there
            if not self.obj_cfgparser.has_section('combinepact'):
                print(
                    "[Protocols:Shannon Entropy Error] The combinepact config file is incorrect."
                )
                print(
                    "[Protocols:Shannon Entropy Error] There is something wrong with the [combinepact] section."
                )
                quit()

            #Import our combinepact class
            try:
                from pact.analysis.combine_pact import combine_pact
            except ImportError:
                print(
                    "[Protocols:Shannon Entropy Error] combine_pact was not found."
                )

            #Create the object then call the merger
            obj_combine = combine_pact(self.obj_cfgparser, self.dict_programs,
                                       {})

            #The dict will be like {'dataset name': {data...
            dict_merged_datasets = obj_combine.combine_pact()
            """
            *****************************************
            Shannon Class Import
            *****************************************
            """
            #Check to see if the section is there
            if not self.obj_cfgparser.has_section('shannon_entropy'):
                print(
                    "[Protocols:Shannon Entropy Error] The combinepact config file is incorrect."
                )
                print(
                    "[Protocols:Shannon Entropy Error] There is something wrong with the [combinepact] section."
                )
                quit()

            #Import our SE class
            try:
                from pact.analysis.sequence.shannon_entropy import shannon_entropy_classifier
            except ImportError:
                print(
                    "[Protocols:Shannon Entropy Error] Cannot load pact.analysis.sequence.shannon_entropy"
                )
                quit()

            #Create our object
            obj_se = shannon_entropy_classifier(self.obj_cfgparser,
                                                self.dict_programs,
                                                {'directory': self.directory})

            #Run our object
            dict_entropy = obj_se.shannon_entropy(dict_merged_datasets)

        return
Exemplo n.º 6
0
    def protocol(self):
        """Main entrypoint for the protocol"""

        #Create a output log file that we can append to
        with open(self.directory + self.output_prefix + "_" + strftime("%m_%d_%Y") + "-" +
                 strftime("%H_%M_%S") + '_output.txt', 'w') as file_output:
            file_output.write(self.pact_preamble + "\n")

            """
            *****************************************
            Pact Combine
            *****************************************
            """
            if self.dict_workflow['combinepact']:
                #Check to see if the section is there
                if not self.obj_cfgparser.has_section('combinepact'):           
                    print("[Protocols:" + str_protocol_name + " Error] The combinepact config file is incorrect.")
                    print("[Protocols:" + str_protocol_name + " Error] There is something wrong with the [combinepact] section.")
                    quit()

                #Import our combinepact class
                try:
                    from pact.analysis.combine_pact import combine_pact
                except ImportError:
                    print("[Protocols:" + str_protocol_name + " Error] combine_pact was not found.")

                #Create the object then call the merger
                obj_combine = combine_pact(self.obj_cfgparser, self.dict_programs, {})

                #The dict will be like {'dataset name': {data...
                dict_merged_datasets = obj_combine.combine_pact()

            """
            *****************************************
            Classify our mutations
            *****************************************
            """
            #Build a dict_classified with [location][mutation] = "DEL/NEU/BEN/NONE"
            
            #Get the config file elements
            try:
                class_column = self.obj_cfgparser.get("variant_classification", "class_column").lower()
                class_threshold = float(self.obj_cfgparser.get("variant_classification", "class_threshold"))
            except NoOptionError:
                print("[Protocols:" + str_protocol_name + " Error] Missing [variant_classification] config file elements.")
                quit()
            except ValueError:
                print("[Protocols:" + str_protocol_name + " Error] Incorrect [variant_classification] config file elements.")
                quit()
            except TypeError:
                print("[Protocols:" + str_protocol_name + " Error] Incorrect [variant_classification] config file elements.")
                quit()

            #Make a dict to add our classifications into
            dict_classified = {}

            #Classify each dataset
            for dataset in dict_merged_datasets:

                #Add if not existing
                if dataset not in dict_classified:
                    dict_classified[dataset] = {}

                #Loop the locations
                for loc in dict_merged_datasets[dataset]:
                
                    #Add a new location if not in the dict
                    if loc not in dict_classified[dataset]:
                        dict_classified[dataset][loc] = {}

                    #Loop the muts
                    for mut in dict_merged_datasets[dataset][loc]:

                        #Skip WT, stop, and NaN
                        if (mut == dict_merged_datasets[dataset][loc][mut]['wt_residue'] or
                            mut == "*" or
                            dict_merged_datasets[dataset][loc][mut][class_column] == "NaN"):

                            dict_classified[dataset][loc][mut] = "UNCLASSIFIED"
                            continue

                        #Get the fitness value from the dataset
                        mut_value = float(dict_merged_datasets[dataset][loc][mut][class_column])

                        #Assign a classification of deleterious, slightly deleterious, or neutral
                        if mut_value <= (-1 * class_threshold):
                            dict_classified[dataset][loc][mut] = "DEL"
                        elif (mut_value > (-1 * class_threshold) and mut_value < class_threshold):
                            dict_classified[dataset][loc][mut] = "NEU"
                        elif mut_value >= class_threshold:
                            dict_classified[dataset][loc][mut] = "BEN"

                        #if mut_value < -1:
                        #    dict_classified[dataset][loc][mut] = "DEL"
                        #elif mut_value >= -1 and mut_value < -0.3:
                        #    dict_classified[dataset][loc][mut] = "NEU"
                        #elif mut_value >= -0.3:
                        #    dict_classified[dataset][loc][mut] = "BEN"

            """
            *****************************************
            Count the basal classifiers
            *****************************************
            """
            if self.dict_workflow['basal_count']:

                #Import our class
                try:
                    from pact.analysis.basal_count import basal_count
                except ImportError:
                    print("[Protocols:" + str_protocol_name + " Error] pact.analysis.basal_count was not found.")
                    quit()
        
                #Create our object
                obj_basal = basal_count(self.obj_cfgparser, self.dict_programs, {})

                #Count our basal rates
                for dataset in dict_classified:
                    print("[Protocols:" + str_protocol_name + "] Basal Fitness Counts for dataset: " + dataset)
                    file_output.write("[Protocols:" + str_protocol_name + "] Basal Fitness Counts for dataset: " + dataset)

                    file_output.write(obj_basal.basal_count(dict_classified[dataset]) + "\n")

            """
            *****************************************
            DNA Filtering/Alignment or PSSM Object
            *****************************************
            """
            if (self.dict_workflow['blastp_align_filter'] or 
                self.dict_workflow['pssm'] or 
                self.dict_workflow['pssm_reader']):

                #Import Check Output
                from subprocess import check_output

                #Import our class
                try:
                    from pact.analysis.sequence.homology_pssm import homology_classifier
                except ImportError:
                    print("[Protocols:" + str_protocol_name + " Error] pact.analysis.sequence.homology_pssm was not found.")
                    quit()
        
                #Create our object
                obj_homology = homology_classifier(self.obj_cfgparser, self.dict_programs, {'directory':self.directory})

                #Count our classifiers
                print("[Protocols:" + str_protocol_name + "] Homology PSSM")
                file_output.write("[Protocols:" + str_protocol_name + "] Homology PSSM")

            """
            *****************************************
            DNA Filtering/Alignment
            *****************************************
            """
            if self.dict_workflow['blastp_align_filter']:
               
                #Convert our XML file
                print("[Protocols:" + str_protocol_name + "] xml_to_fasta")
                file_output.write("[Protocols:" + str_protocol_name + "] xml_to_fasta\n")
                obj_homology.xml_to_fasta()

                #Run CD-HIT on our new fasta file
                print("[Protocols:" + str_protocol_name + "] cdhit")
                file_output.write("[Protocols:" + str_protocol_name + "] cdhit\n")

                #Check to see if the number of processes is logical
                self.processes = self.obj_cfgparser.get("blastp_align_filter", "processes")
                if int(self.processes) <= 0:
                    self.processes = "2"

                check_output([self.dict_programs['cdhit'],
                              "-i",
                              self.directory + self.output_prefix + ".fa",
                              "-o",
                              self.directory + self.output_prefix + ".afa",
                              "-c",
                              str(self.obj_cfgparser.get("blastp_align_filter", "cdhit_clustering_threshold")),
                              "-M",
                              "40000",
                              "-T",
                              str(self.processes)])

                #Check to see if we have WT in our cdhit output
                print("[Protocols:" + str_protocol_name + "] cdhit_wtcheck")
                file_output.write("[Protocols:" + str_protocol_name + "] cdhit_wtcheck\n")
                obj_homology.cdhit_wt_check()

                #Run MUSCLE on our new fasta file
                print("[Protocols:" + str_protocol_name + "] muscle")
                file_output.write("[Protocols:" + str_protocol_name + "] muscle\n")
                check_output([self.dict_programs['muscle'],
                              "-in",
                              self.directory + self.output_prefix + ".afa",
                              "-out",
                              self.directory + self.output_prefix + ".msa"])

                #Process our MSA (needs to be on for PSIBlast)
                print("[Protocols:" + str_protocol_name + "] processmsa")
                file_output.write("[Protocols:" + str_protocol_name + "] processmsa\n")
                list_msa = obj_homology.process_msa()

                #Save our list
                print("[Protocols:" + str_protocol_name + "] Saving our MSA")
                file_output.write("[Protocols:" + str_protocol_name + "] Saving our MSA\n")
                save_pact_file(list_msa, self.directory + self.output_prefix + '_' + "list_msa")

            """
            *****************************************
            PSSM
            *****************************************
            """
            if self.dict_workflow['pssm']:
                #Open our list
                print("[Protocols:" + str_protocol_name + "] Opening our MSA")
                file_output.write("[Protocols:" + str_protocol_name + "] Opening our MSA\n")
                list_msa = open_pact_file(self.directory + self.output_prefix + '_' + "list_msa")

                #Split our msa for PSIBlast (needs to be on for PSIBlast)
                print("[Protocols:" + str_protocol_name + "] msa_split")
                file_output.write("[Protocols:" + str_protocol_name + "] msa_split\n")
                list_pbcmds = obj_homology.msa_split(list_msa)

                #Run PSIBlast
                print("[Protocols:" + str_protocol_name + "] psiblast")
                file_output.write("[Protocols:" + str_protocol_name + "] psiblast\n")
                for command in list_pbcmds:
                    check_output([self.dict_programs['psiblast'], *command])

                #Import our PSSM data
                print("[Protocols:" + str_protocol_name + "] pssm_file_import")
                file_output.write("[Protocols:" + str_protocol_name + "] pssm_file_import\n")
                dict_pssm = obj_homology.pssm_file_import()

                #Save our heatmap
                print("[Protocols:" + str_protocol_name + "] Saving a PSSM .csv heatmap")
                file_output.write(obj_homology.pssm_output_heat(dict_pssm) + "\n")

                #Save our csv
                print("[Protocols:" + str_protocol_name + "] Saving a PSSM .csv column data")
                file_output.write(obj_homology.pssm_output_csv(dict_pssm) + "\n")

                #Save our PACT File
                print("[Protocols:" + str_protocol_name + "] Saving a PSSM .pact file")
                file_output.write(save_pact_file(dict_pssm, self.directory + self.output_prefix + '_' + "PSSM") + "\n")

            """
            *****************************************
            Read stored PSSM files
            *****************************************
            """
            if (self.dict_workflow['pssm_reader'] or self.dict_workflow['consensus']):
                #Open our PACT File
                print("[Protocols:" + str_protocol_name + "] Opening a PSSM .pact file")
                dict_pssm = open_pact_file(self.directory + self.output_prefix + '_' + "PSSM")

            """
            *****************************************
            PDB Import Section
            *****************************************
            """
            #Only import and run if selected
            if self.dict_workflow['pdb_import']:

                #Check to see if the section is there
                if not self.obj_cfgparser.has_section('pdb_import'):           
                    print("[Protocols:" + str_protocol_name + " Error] The pdb_import config file is incorrect.")
                    print("[Protocols:" + str_protocol_name + " Error] There is something wrong with the [pdb_import] section.")
                    quit()

                #Import our class
                try:
                    from pact.analysis.pdb_import import pdb_import
                except ImportError:
                    print("[Protocols:" + str_protocol_name + " Error] pdb_import was not found.")

                #Create the object then call the merger
                obj_pdb = pdb_import(self.obj_cfgparser, self.dict_programs, {'directory':self.directory})

                #The dict will be like {'pdb name': {data...
                dict_pdb = obj_pdb.pdb_import()

            """
            *****************************************
            Back to Consensus Analyses
            *****************************************
            """
            if self.dict_workflow['consensus']:
                #Import our class
                try:
                    from pact.analysis.sequence.consensus import consensus
                except ImportError:
                    print("[Protocols:" + str_protocol_name + "] pact.analysis.basal_count was not found.")
                    quit()
        
                #Create our object
                obj_consensus = consensus(self.obj_cfgparser, self.dict_programs, {'directory':self.directory})

                #Get the wild-type sequence information
                dict_wtcons = obj_consensus.wt_consensus(dict_pssm)

            """
            *****************************************
            Residue Chemical/Size
            *****************************************
            """
            if self.dict_workflow['residue_chemical_size']:

                #Import our residue_chemical_size class
                try:
                    from pact.analysis.sequence.residue_chemical_size import residue_chemical_size
                except ImportError:
                    print("[Protocols:Enzyme Solubility Error] residue_chemical_size was not found.")

                #Create the object then call the merger
                obj_rcs = residue_chemical_size(self.obj_cfgparser, self.dict_programs, {})

            """
            *****************************************
            Distance to Active Site
            *****************************************
            """
            #Only import and run if selected
            if self.dict_workflow['distance_to_active']:

                #Check to see if the section is there
                if not self.obj_cfgparser.has_section('distance_to_active'):           
                    print("[Protocols:Enzyme Solubility Error] The distance_to_active config file is incorrect.")
                    print("[Protocols:Enzyme Solubility Error] There is something wrong with the [distance_to_active] section.")
                    quit()

                #Import our class
                try:
                    from pact.analysis.structure.dist_to_active import dist_to_active
                except ImportError:
                    print("[Protocols:Enzyme Solubility] pact.analysis.structure.dist_to_active was not found.")
                    quit()
        
                #Create our object
                obj_dtoa = dist_to_active(self.obj_cfgparser, self.dict_programs, {})

                #Calculate the distance
                dict_dtoa_dist = obj_dtoa.dta_dist(dict_pdb)

            """
            *****************************************
            Contact Number
            *****************************************
            """
            #Only import and run if selected
            if self.dict_workflow['contact_number']:

                #Check to see if the section is there
                if not self.obj_cfgparser.has_section('contact_number'):           
                    print("[Protocols:Enzyme Solubility Error] The contact_number config file is incorrect.")
                    print("[Protocols:Enzyme Solubility Error] There is something wrong with the [contact_number] section.")
                    quit()

                #Import our class
                try:
                    from pact.analysis.structure.contact_number import contact_number
                except ImportError:
                    print("[Protocols:Enzyme Solubility] pact.analysis.structure.contact_number was not found.")
                    quit()
        
                #Create our object
                obj_contact = contact_number(self.obj_cfgparser, self.dict_programs, {})

                #Calculate the distance
                dict_contact = obj_contact.contact_number(dict_pdb)

            """
            *****************************************
            Output CSV and .pact
            *****************************************
            """
            quit()
            aa_table = 'ACDEFGHIKLMNPQRSTVWY'
            wtaa = self.obj_cfgparser.get('global', 'wtaa').upper()
            
            if self.dict_workflow['pdb_import']:
                chain = self.obj_cfgparser.get('classification_analysis', 'chain').upper()
                pdb_file = self.obj_cfgparser.get('classification_analysis', 'pdb_file')
                list_pdb_sites = sorted([x for x in dict_pdb[pdb_file]['dssp'][chain]])

            #Make a dict to work into
            dict_output = {}

            #Get the dataset name in order
            if self.obj_cfgparser.has_section("combinepact"):
                num_datasets = int(self.obj_cfgparser.get('combinepact', 'numdatasets'))
                list_datasets = [self.obj_cfgparser.get('combinepact', 'dataset_' + str(int_dataset)) 
                                 for int_dataset in range(1, num_datasets + 1)]
            else:
                list_datasets = []

            #Get the header
            str_output = ','.join([
                "Location",
                "Mutation",
                ','.join([dataset + "_fitness" for dataset in list_datasets]),
                ','.join([dataset + "_sd_from_wt" for dataset in list_datasets]),
                ','.join([dataset + "_classified" for dataset in list_datasets]),
                "wt_resi",
                "wt_pssm",
                "wt_percent",
                "max_pssm",
                "max_percent",
                "pssm_cons_count",
                "percent_cons_count",
                "wt_max_pssm",
                "wt_max_percent",
                "mut_pssm",
                "mut_percent",
                "frac_burial",
                "polarity",
                "aromatics",
                "philic_phobic",
                "size",
                "hydropathy",
                "dist_to_active",
                "contact_number"
                ]) + "\n"

            #Loop the locations
            for loc in range(1, len(wtaa) + 1):

                #Add to dict if not already added
                if loc not in dict_output:
                    dict_output[loc] = {}

                #Loop the mutations
                for mut in aa_table:

                    #Add to dict if not already added
                    if mut not in dict_output[loc]:
                        dict_output[loc][mut] = {}

                    #Get the location
                    str_output = str_output + str(loc) + ','

                    #Get the mutation
                    str_output = str_output + mut + ','

                    #Get the datasets, test if loc is in there
                    str_output = str_output + ','.join([str(dict_merged_datasets[dataset][loc][mut]['fitness'])
                                                        if loc in dict_merged_datasets[dataset] else " "
                                                        for dataset in list_datasets
                                                        ]) + ','

                    str_output = str_output + ','.join([str(dict_merged_datasets[dataset][loc][mut]['sd_from_wt'])
                                                        if loc in dict_merged_datasets[dataset] else " "
                                                        for dataset in list_datasets
                                                        ]) + ','

                    #Get the classified
                    str_output = str_output + ','.join([str(dict_classified[dataset][loc][mut])
                                                        if loc in dict_classified[dataset] else " "
                                                        for dataset in list_datasets
                                                        ]) + ','

                    #Get the WT Consensus Data
                    if self.dict_workflow['consensus']:
                        str_output = str_output + ','.join(map(str, [
                            dict_wtcons[loc]['wt_resi'],                                   
                            dict_wtcons[loc]['wt_pssm'],  
                            dict_wtcons[loc]['wt_percent'],  
                            dict_wtcons[loc]['max_pssm'],  
                            dict_wtcons[loc]['max_percent'],  
                            dict_wtcons[loc]['pssm_cons_count'],
                            dict_wtcons[loc]['percent_cons_count'],  
                            dict_wtcons[loc]['wt_max_pssm'],  
                            dict_wtcons[loc]['wt_max_percent'],  
                            ])) + ','
                    else:
                        str_output = str_output + "NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,"

                    #Get the PSSM data
                    if self.dict_workflow['pssm_reader']:
                        str_output = str_output + dict_pssm[loc][mut][0] + ","
                        str_output = str_output + dict_pssm[loc][mut][1] + ","
                    else:
                        str_output = str_output + "NaN,NaN,"

                    #Get the fraction burial
                    if self.dict_workflow['pdb_import']:
                        if loc in list_pdb_sites:
                            str_output = str_output + str(dict_pdb[pdb_file]['dssp'][chain][loc]['frac_burial']) + ","
                        else:
                            str_output = str_output + "NaN,"
                    else:
                        str_output = str_output + "NaN,"

                    #To/From Proline
                    if self.dict_workflow['residue_chemical_size']:
                        dict_rcs_mut = obj_rcs.mut_info(wtaa[loc - 1], mut)
                        str_output = str_output + ','.join([dict_rcs_mut['polarity'],
                                                            dict_rcs_mut['aromatics'],
                                                            dict_rcs_mut['philic_phobic'],
                                                            dict_rcs_mut['size'],
                                                            str(dict_rcs_mut['hydropathy']),
                                                            ]) + ','
                    else:
                        str_output = str_output + "NaN,NaN,NaN,NaN,NaN,"

                    #Dist to active site
                    if self.dict_workflow['distance_to_active']:
                        str_output = str_output + str(min(dict_dtoa_dist[chain][loc])) + ','
                    else:
                        str_output = str_output + "NaN,"
                    
                    #Contact number
                    if self.dict_workflow['contact_number']:
                        str_output = str_output + str(len(dict_contact[chain][loc])) + ','
                    else:
                        str_output = str_output + "NaN,"

                    #Newline
                    str_output = str_output + '\n'            

            #Output a csv file
            with open(self.directory + self.output_prefix + '_dataset.csv', 'w') as file_output:
                file_output.write(str_output)

            #At this point it's easier to backcalculate the csv file
            list_output = str_output.splitlines()
            list_keys = list_output[0].rstrip('\n').split(',')
            columns = len(list_keys)
            
            #Parse the lines
            for line in list_output[1:]:

                #Split the line
                splitline = line.split(',')

                #Parse each column
                for i in range(2, columns):
                    dict_output[int(splitline[0])][splitline[1]][list_keys[i]] = splitline[i]

            #Output a pact file
            print(save_pact_file(dict_output, self.directory + self.output_prefix + '_dataset'))

        return
Exemplo n.º 7
0
    def protocol(self):
        """Provide a protocol that does general analyses that don't need a full protocol"""

        #Create a output log file that we can append to
        with open(self.directory + self.output_prefix + "_" + strftime("%m_%d_%Y") + "-" +
                 strftime("%H_%M_%S") + '_output.txt', 'w') as file_output:
            file_output.write(self.pact_preamble + "\n")

            """
            *****************************************
            Pact Combine (Required)
            *****************************************
            """
            #Check to see if the section is there
            if not self.obj_cfgparser.has_section('combinepact'):           
                print("[Protocols:Analysis Error] The combinepact config file is incorrect.")
                print("[Protocols:Analysis Error] There is something wrong with the [combinepact] section.")
                quit()

            #Import our combinepact class
            try:
                from pact.analysis.combine_pact import combine_pact
            except ImportError:
                print("[Protocols:Analysis Error] combine_pact was not found.")

            #Create the object then call the merger
            obj_combine = combine_pact(self.obj_cfgparser, self.dict_programs, {})

            #Print Section Progress
            print("[Protocols:Analysis] Combine PACT")

            #The dict will be like {'dataset name': {data...
            dict_merged_datasets = obj_combine.combine_pact()

            """
            *****************************************
            T-Test of Two Groups
            *****************************************
            """
            if self.dict_workflow['aa_compare_ttest']:

                #Check to see if the section is there
                if not self.obj_cfgparser.has_section('aa_compare_ttest'):           
                    print("[Protocols:Analysis Error] The aa_compare_ttest config file is incorrect.")
                    print("[Protocols:Analysis Error] There is something wrong with the [aa_compare_ttest] section.")
                    quit()

                #Import our class
                try:
                    from pact.analysis.sequence.aa_fitmet_compare import aa_fitmet_compare
                except ImportError:
                    print("[Protocols:Analysis Error] aa_fitmet_compare was not found.")
        
                #Create the object then call the merger
                obj_aac = aa_fitmet_compare(self.obj_cfgparser, self.dict_programs, {'directory':self.directory})

                #Run the main routine
                print("[Protocols:Analysis] T-Test of amino acid groups")
                file_output.write(obj_aac.aa_fitmet_compare(dict_merged_datasets))

            """
            *****************************************
            Count our mutations
            *****************************************
            """
            if self.dict_workflow['threshold_count']:

                #Check to see if the section is there
                if not self.obj_cfgparser.has_section('threshold_count'):           
                    print("[Protocols:Analysis Error] The threshold_count config file is incorrect.")
                    print("[Protocols:Analysis Error] There is something wrong with the [threshold_count] section.")
                    quit()

                #Create our object
                try:
                    from pact.analysis.sequence.threshold_count import threshold_count
                except ImportError:
                    print("[Protocols:Analysis Error] threshold_count was not found.")

                #Create the object then call the analysis
                obj_tc = threshold_count(self.obj_cfgparser, self.dict_programs, {'directory':self.directory})

                #Count them
                print("[Protocols:Analysis] Count of mutations above and below a cutoff")
                file_output.write(obj_tc.threshold_count(dict_merged_datasets))

        return
Exemplo n.º 8
0
    def protocol(self):
        """Main entrypoint for the protocol"""

        #Create a output log file that we can append to
        with open(self.directory + self.output_prefix + "_" + strftime("%m_%d_%Y") + "-" +
                 strftime("%H_%M_%S") + '_output.txt', 'w') as file_output:
            file_output.write(self.pact_preamble + "\n")

            """
            *****************************************
            Pact Combine (Required)
            *****************************************
            """
            #Check to see if the section is there
            if not self.obj_cfgparser.has_section('combinepact'):           
                print("[Protocols:Enzyme Solubility Error] The combinepact config file is incorrect.")
                print("[Protocols:Enzyme Solubility Error] There is something wrong with the [combinepact] section.")
                quit()

            #Import our combinepact class
            try:
                from pact.analysis.combine_pact import combine_pact
            except ImportError:
                print("[Protocols:Enzyme Solubility Error] combine_pact was not found.")

            #Create the object then call the merger
            obj_combine = combine_pact(self.obj_cfgparser, self.dict_programs, {})

            #The dict will be like {'dataset name': {data...
            dict_merged_datasets = obj_combine.combine_pact()

            """
            *****************************************
            Classify our mutations
            *****************************************
            """
            #Build a dict_classified with [location][mutation] = "DEL/SLIGHTDEL/NEU/NONE"
            #0.15 in GFP, 80% of WT = neutral, 50% of WT = slightly, <50% of WT = deleterious
            
            #Get the config file elements
            try:
                screen_dataset = self.obj_cfgparser.get("enzyme_solubility", "dataset_screen")
                screen_threshold = float(self.obj_cfgparser.get("enzyme_solubility", "screen_threshold"))
                fitness_dataset = self.obj_cfgparser.get("enzyme_solubility", "dataset_fitness")
                fitness_neutral = float(self.obj_cfgparser.get("enzyme_solubility", "fitness_neu"))
                fitness_slightdel = float(self.obj_cfgparser.get("enzyme_solubility", "fitness_slightdel"))
            except NoOptionError:
                print("[Enzyme Solubility Error] Missing [enzyme_solubility] config file elements.")
                quit()
            except ValueError:
                print("[Enzyme Solubility Error] Incorrect [enzyme_solubility] config file elements.")
                quit()
            except TypeError:
                print("[Enzyme Solubility Error] Incorrect [enzyme_solubility] config file elements.")
                quit()

            #Make a dict to add our classifications into
            dict_classified = {}
            dict_basal = {}

            #Loop the locations
            for loc in dict_merged_datasets[screen_dataset]:
                
                #Add a new location if not in the dict
                if loc not in dict_classified:
                    dict_classified[loc] = {}

                #Add a new location if not in the dict
                if loc not in dict_basal:
                    dict_basal[loc] = {}

                #Loop the muts
                for mut in dict_merged_datasets[screen_dataset][loc]:

                    #Skip WT, stop, and NaN
                    if (mut == dict_merged_datasets[fitness_dataset][loc][mut]['wt_residue'] or
                        mut == "*" or
                        dict_merged_datasets[fitness_dataset][loc][mut]['fitness'] == "NaN"):

                        dict_basal[loc][mut] = "UNCLASSIFIED"
                        continue

                    #Get the fitness value from the fitness dataset
                    fitness_value = float(dict_merged_datasets[fitness_dataset][loc][mut]['fitness'])

                    #For the basal screen fitness
                    #Assign a classification of deleterious, slightly deleterious, or neutral
                    if fitness_value < fitness_slightdel:
                        dict_basal[loc][mut] = "DEL"

                    elif (fitness_value >= fitness_slightdel and fitness_value < fitness_neutral):
                        dict_basal[loc][mut] = "SLIGHTDEL"

                    elif fitness_value >= fitness_neutral:
                        dict_basal[loc][mut] = "NEU"

                    #Skip WT, stop, and NaN
                    if (mut == dict_merged_datasets[screen_dataset][loc][mut]['wt_residue'] or
                        mut == "*" or
                        dict_merged_datasets[screen_dataset][loc][mut]['fitness'] == "NaN"):

                        dict_classified[loc][mut] = "UNCLASSIFIED"
                        continue

                    #Are we are enriched in the screen dataset?
                    if float(dict_merged_datasets[screen_dataset][loc][mut]['fitness']) < screen_threshold:
                        dict_classified[loc][mut] = "UNCLASSIFIED"
                        continue

                    #Assign a classification of deleterious, slightly deleterious, or neutral
                    if fitness_value < fitness_slightdel:
                        dict_classified[loc][mut] = "DEL"

                    elif (fitness_value >= fitness_slightdel and fitness_value < fitness_neutral):
                        dict_classified[loc][mut] = "SLIGHTDEL"

                    elif fitness_value >= fitness_neutral:
                        dict_classified[loc][mut] = "NEU"

            """
            *****************************************
            Count the basal classifiers
            *****************************************
            """
            if self.dict_workflow['basal_count']:

                #Import our class
                try:
                    from pact.analysis.basal_count import basal_count
                except ImportError:
                    print("[Protocols:Enzyme Solubility] pact.analysis.basal_count was not found.")
                    quit()
        
                #Create our object
                obj_basal = basal_count(self.obj_cfgparser, self.dict_programs, {})

                #Count our basal rates
                print("[Protocols:Enzyme Solubility] Basal Screen Counts")
                file_output.write("[Protocols:Enzyme Solubility] Basal Screen Counts")
                file_output.write(obj_basal.basal_count(dict_basal) + "\n")

                print("[Protocols:Enzyme Solubility] Basal Fitness Counts")
                file_output.write("[Protocols:Enzyme Solubility] Basal Fitness Counts")
                file_output.write(obj_basal.basal_count(dict_classified) + "\n")

            """
            *****************************************
            DNA Filtering/Alignment or PSSM Object
            *****************************************
            """
            if self.dict_workflow['blastp_align_filter'] or self.dict_workflow['pssm']:

                #Import Check Output
                from subprocess import check_output

                #Import our class
                try:
                    from pact.analysis.sequence.homology_pssm import homology_classifier
                except ImportError:
                    print("[Protocols:Enzyme Solubility] pact.analysis.sequence.homology_pssm was not found.")
                    quit()
        
                #Create our object
                obj_homology = homology_classifier(self.obj_cfgparser, self.dict_programs, {'directory':self.directory})

            """
            *****************************************
            DNA Filtering/Alignment
            *****************************************
            """
            if self.dict_workflow['blastp_align_filter']:
               
                #Convert our XML file
                print("[Protocols:Enzyme Solubility] xml_to_fasta")
                file_output.write("[Protocols:Enzyme Solubility] xml_to_fasta\n")
                obj_homology.xml_to_fasta()

                #Run CD-HIT on our new fasta file
                print("[Protocols:Enzyme Solubility] cdhit")
                file_output.write("[Protocols:Enzyme Solubility] cdhit\n")

                #Check to see if the number of processes is logical
                self.processes = self.obj_cfgparser.get("blastp_align_filter", "processes")
                if int(self.processes) <= 0:
                    self.processes = "2"

                check_output([self.dict_programs['cdhit'],
                              "-i",
                              self.directory + self.output_prefix + ".fa",
                              "-o",
                              self.directory + self.output_prefix + ".afa",
                              "-c",
                              str(self.obj_cfgparser.get("blastp_align_filter", "cdhit_clustering_threshold")),
                              "-M",
                              "40000",
                              "-T",
                              str(self.processes)])

                #Check to see if we have WT in our cdhit output
                print("[Protocols:Enzyme Solubility] cdhit_wtcheck")
                file_output.write("[Protocols:Enzyme Solubility] cdhit_wtcheck\n")
                obj_homology.cdhit_wt_check()

                #Run MUSCLE on our new fasta file
                print("[Protocols:Enzyme Solubility] muscle")
                file_output.write("[Protocols:Enzyme Solubility] muscle\n")
                check_output([self.dict_programs['muscle'],
                              "-in",
                              self.directory + self.output_prefix + ".afa",
                              "-out",
                              self.directory + self.output_prefix + ".msa"])

                #Process our MSA (needs to be on for PSIBlast)
                print("[Protocols:Enzyme Solubility] processmsa")
                file_output.write("[Protocols:Enzyme Solubility] processmsa\n")
                list_msa = obj_homology.process_msa()

                #Save our list
                print("[Protocols:Enzyme Solubility] Saving our MSA")
                file_output.write("[Protocols:Enzyme Solubility] Saving our MSA\n")
                obj_homology.save_data_structure(list_msa, "list_msa")

            """
            *****************************************
            PSSM
            *****************************************
            """
            if self.dict_workflow['pssm'] or self.dict_workflow['strict_filter']:
                #Open our list
                print("[Protocols:Enzyme Solubility] Opening our MSA")
                file_output.write("[Protocols:Enzyme Solubility] Opening our MSA\n")
                list_msa = obj_homology.open_data_structure("list_msa")

                #Split our msa for PSIBlast (needs to be on for PSIBlast)
                print("[Protocols:Enzyme Solubility] msa_split")
                file_output.write("[Protocols:Enzyme Solubility] msa_split\n")
                list_pbcmds = obj_homology.msa_split(list_msa)

                #Run PSIBlast
                print("[Protocols:Enzyme Solubility] psiblast")
                file_output.write("[Protocols:Enzyme Solubility] psiblast\n")
                for command in list_pbcmds:
                    check_output([self.dict_programs['psiblast'], *command])

                #Import our PSSM data
                print("[Protocols:Enzyme Solubility] pssm_file_import")
                file_output.write("[Protocols:Enzyme Solubility] pssm_file_import\n")
                dict_pssm = obj_homology.pssm_file_import()

                #Save our heatmap
                print("[Protocols:Enzyme Solubility] Saving a PSSM .csv heatmap")
                file_output.write(obj_homology.pssm_output_heat(dict_pssm) + "\n")

                #Save our csv
                print("[Protocols:Enzyme Solubility] Saving a PSSM .csv column data")
                file_output.write(obj_homology.pssm_output_csv(dict_pssm) + "\n")

                #Save our PACT File
                print("[Protocols:Enzyme Solubility] Saving a PSSM .pact file")
                file_output.write(obj_homology.save_data_structure(dict_pssm, "PSSM") + "\n")

                #Count our classifiers
                print("[Protocols:Enzyme Solubility] PSSM")
                file_output.write("[Protocols:Enzyme Solubility] PSSM")

                print("Fitness Rates:")
                file_output.write("Fitness Rates:")
                file_output.write(obj_homology.classified_count_pssm(dict_pssm, dict_basal) + "\n")
                
                print("\nScreen Rates:")
                file_output.write("\nScreen Rates:")
                file_output.write(obj_homology.classified_count_pssm(dict_pssm, dict_classified) + "\n")

            """
            *****************************************
            Residue Chemical/Size
            *****************************************
            """
            if self.dict_workflow['residue_chemical_size']:

                #Import our residue_chemical_size class
                try:
                    from pact.analysis.sequence.residue_chemical_size import residue_chemical_size
                except ImportError:
                    print("[Protocols:Enzyme Solubility Error] residue_chemical_size was not found.")

                #Create the object then call the merger
                obj_rcs = residue_chemical_size(self.obj_cfgparser, self.dict_programs, {})

                #Return the process dict {1: {'A': {''
                dict_rcs = obj_rcs.process_dataset(dict_merged_datasets)

                #Count our classifiers
                print("[Protocols:Enzyme Solubility] Residue Chemical/Size")
                file_output.write("[Protocols:Enzyme Solubility] Residue Chemical/Size")

                print("Fitness Rates:")
                file_output.write("Fitness Rates:")
                file_output.write(obj_rcs.classified_count(dict_rcs, fitness_dataset, dict_basal) + "\n")
                
                print("\nScreen Rates:")
                file_output.write("\nScreen Rates:")
                file_output.write(obj_rcs.classified_count(dict_rcs, screen_dataset, dict_classified) + "\n")

            """
            *****************************************
            PDB Import Section
            *****************************************
            """
            #Only import and run if selected
            if (self.dict_workflow['pdb_import'] or 
                self.dict_workflow['distance_to_active'] or 
                self.dict_workflow['contact_number'] or 
                self.dict_workflow['strict_filter']):

                #Check to see if the section is there
                if not self.obj_cfgparser.has_section('pdb_import'):           
                    print("[Protocols:Enzyme Solubility Error] The pdb_import config file is incorrect.")
                    print("[Protocols:Enzyme Solubility Error] There is something wrong with the [pdb_import] section.")
                    quit()

                #Import our combinepact class
                try:
                    from pact.analysis.pdb_import import pdb_import
                except ImportError:
                    print("[Protocols:Enzyme Solubility Error] pdb_import was not found.")

                #Create the object then call the merger
                obj_pdb = pdb_import(self.obj_cfgparser, self.dict_programs, {'directory':self.directory})

                #The dict will be like {'pdb name': {data...
                dict_pdb = obj_pdb.pdb_import()

            """
            *****************************************
            Distance to Active Site
            *****************************************
            """
            #Only import and run if selected
            if self.dict_workflow['distance_to_active'] or self.dict_workflow['strict_filter']:

                #Check to see if the section is there
                if not self.obj_cfgparser.has_section('distance_to_active'):           
                    print("[Protocols:Enzyme Solubility Error] The distance_to_active config file is incorrect.")
                    print("[Protocols:Enzyme Solubility Error] There is something wrong with the [distance_to_active] section.")
                    quit()

                #Import our class
                try:
                    from pact.analysis.structure.dist_to_active import dist_to_active
                except ImportError:
                    print("[Protocols:Enzyme Solubility] pact.analysis.structure.dist_to_active was not found.")
                    quit()
        
                #Create our object
                obj_dtoa = dist_to_active(self.obj_cfgparser, self.dict_programs, {})

                #Calculate the distance
                dict_dtoa_dist = obj_dtoa.dta_dist(dict_pdb)

                #Count our classifiers
                print("[Protocols:Enzyme Solubility] Distance to Active Site")
                file_output.write("[Protocols:Enzyme Solubility] Distance to Active Site")

                print("Fitness Rates:")
                file_output.write("Fitness Rates:")
                file_output.write(obj_dtoa.classified_count(dict_dtoa_dist, dict_basal) + "\n")
                
                print("\nScreen Rates:")
                file_output.write("\nScreen Rates:")
                file_output.write(obj_dtoa.classified_count(dict_dtoa_dist, dict_classified) + "\n")

            """
            *****************************************
            Contact Number
            *****************************************
            """
            #Only import and run if selected
            if self.dict_workflow['contact_number'] or self.dict_workflow['strict_filter']:

                #Check to see if the section is there
                if not self.obj_cfgparser.has_section('contact_number'):           
                    print("[Protocols:Enzyme Solubility Error] The contact_number config file is incorrect.")
                    print("[Protocols:Enzyme Solubility Error] There is something wrong with the [contact_number] section.")
                    quit()

                #Import our class
                try:
                    from pact.analysis.structure.contact_number import contact_number
                except ImportError:
                    print("[Protocols:Enzyme Solubility] pact.analysis.structure.contact_number was not found.")
                    quit()
        
                #Create our object
                obj_contact = contact_number(self.obj_cfgparser, self.dict_programs, {})

                #Calculate the distance
                dict_contact = obj_contact.contact_number(dict_pdb)

                #Count our classifiers
                print("[Protocols:Enzyme Solubility] Contact Number")
                file_output.write("[Protocols:Enzyme Solubility] Contact Number")

                print("Fitness Rates:")
                file_output.write("Fitness Rates:")
                file_output.write(obj_contact.classified_count(dict_contact, dict_basal) + "\n")
                
                print("\nScreen Rates:")
                file_output.write("\nScreen Rates:")
                file_output.write(obj_contact.classified_count(dict_contact, dict_classified) + "\n")

            """
            *****************************************
            Strict Enzyme Filter
            *****************************************
            """
            #Only import and run if selected
            if self.dict_workflow['strict_filter']:

                print("[Protocols:Enzyme Solubility] Strict Enzyme Filter")
                file_output.write("[Protocols:Enzyme Solubility] Strict Enzyme Filter")

                #Check if the dicts exist
                if 'dict_pssm' not in locals():
                    print("[Protocols:Enzyme Solubility] Missing PSSM Data")
                    file_output.write("[Protocols:Enzyme Solubility] Missing PSSM Data")
                    quit()

                if 'dict_contact' not in locals():
                    print("[Protocols:Enzyme Solubility] Missing Contact Number Data")
                    file_output.write("[Protocols:Enzyme Solubility] Missing Contact Number Data")
                    quit()

                if 'dict_dtoa_dist' not in locals():
                    print("[Protocols:Enzyme Solubility] Missing Active Site Distance Data")
                    file_output.write("[Protocols:Enzyme Solubility] Missing Active Site Distance Data")
                    quit()


                #Implement a filter that
                #PSSM >= 0
                #Distance to Active >= 15A
                #Contact Number <= 16
                #No proline mutations

                #Create a list to work into
                list_strictfilter = []

                #Loop the locations
                for loc in dict_classified:

                    #Loop the mutations
                    for mut in dict_classified[loc]:

                        #Skip PRO, and Stop
                        if (mut == "P" or 
                            mut == "*" or 
                            dict_merged_datasets[screen_dataset][loc][mut]['wt_residue'] == "P"):
                            continue

                        #Check if PSSM is less than 0
                        if int(dict_pssm[loc][mut][0]) < 0:
                            continue

                        #Skip residues without location data
                        if loc not in dict_contact or loc not in dict_dtoa_dist:
                            continue

                        #Check if Distance to Active is less than 15A
                        if min(dict_dtoa_dist[loc]) < 15:
                            continue

                        #Check if the contact number is greater than 16
                        if len(dict_contact[loc]) > 16:
                            continue

                        #Otherwise, add to our list
                        list_strictfilter.append(dict_classified[loc][mut])

                #Report
                str_return = '\n'.join(map(str, [
                "Enzyme Strict Filter",
                pretty_counter_dicts(dict(Counter(list_strictfilter)))
                ]))

                print(str_return)
                file_output.write(str_return)

        return