Exemplo n.º 1
0
    def cdhit_wt_check(self):
        """Checks to see if WT is still there after CDHIT"""

        #Check inputs for valid values
        file_checker(self.directory + self.proteinname + ".afa")

        #Flag to see if our WT is in the input
        bool_wt_in_ncbi = False

        #Open the file and check it line by line
        with open(self.directory + self.proteinname + ".afa", 'r') as infile:
            for line in infile:

                #Check to see if the sequence was WT
                if line.rstrip('\n\r') == self.proteinname:
                    bool_wt_in_ncbi = True

        #If WT was not in the NCBI input then add it
        if bool_wt_in_ncbi == False:
            with open(self.directory + self.proteinname + ".afa", 'a') as outfile:
                outfile.write('>' + self.proteinname + '\r')
                outfile.write('\n')
                outfile.write(self.wtaa + '\r')

        return
Exemplo n.º 2
0
    def __init__(self, obj_config, dict_programs, dict_protocolconfig):
        """Initialize the class varibles"""
        
        #Get the config file parser object from the calling class
        self.config_file = obj_config

        #Get the PACT dict programs
        self.dict_programs = dict_programs

        #Get the extra config options assigned by the protocol
        self.dict_protocolconfig = dict_protocolconfig

        #Check if the config file section is defined
        if not self.config_file.has_section("pdb_import"):
            print("[PDB Import Error] the config file is missing the section for PDB Import")

        #Set the number of pdb files
        try:
            self.numfiles = int(self.config_file.get('pdb_import', 'numpdb'))
        except ValueError:
            print("[PDB Import Error] numpdb is not set properly in the config file.")
        except NoOptionError:
            print("[PDB Import Error] The pdb_import config section is incorrect.")
            print("[PDB Import Error] Missing: numpdb")
            quit()

        #Get the pdb names
        self.dict_pdbs = {}

        #Validate each pdb file and store the full filename
        try:
            #Loop each file
            for i in range(1, self.numfiles + 1):
                #Get the pdb filename
                file_pdb = self.config_file.get('pdb_import', 'file_' + str(i))

                #See if the file exists
                if file_checker(self.dict_protocolconfig['directory'] + file_pdb):
                    #Create a dict to work into
                    self.dict_pdbs[file_pdb] = {}

                    #Store the valid filename
                    self.dict_pdbs[file_pdb] = {'filename':self.dict_protocolconfig['directory'] + file_pdb}
        except NoOptionError:
            print("[PDB Import Error] The pdb_import config file is incorrect.")
            print("[PDB Import Error] Missing: file_#")
            quit()
        
        return
Exemplo n.º 3
0
    def __init__(self, settings_dict):
        """Initialize the class varibles"""

        #Assign our varibles to the class
        self.wtaa = settings_dict['WTAA']
        self.FirstAAMutated = int(settings_dict['FirstAAMutated']) - 1

        #Check to see if the number of processes is logical
        if settings_dict['Processes'] <= 0 or settings_dict[
                'Processes'] is None:
            self.processes = cpu_count()
        else:
            self.processes = settings_dict['Processes']

        #Check to see if the input file exists
        if file_checker(settings_dict['In_File']):
            self.in_file = settings_dict['In_File']

        #Setup the output prefix
        self.out_prefix = settings_dict['Out_Prefix']

        #Check to see if the mut count threshold is logical
        if int(settings_dict['MutThreshold']) < 1:
            self.mutcountthreshold = 1
        else:
            self.mutcountthreshold = int(settings_dict['MutThreshold'])

        #Import the mutation design list
        self.list_mutation_design = []
        for group in literal_eval(settings_dict['mutcodons']):
            #If we have n between two numbers
            if 'n' in group:
                #We need exactly three to expand the range
                if len(group) == 3:
                    #Get the start and end position, then iterate through the two points
                    self.list_mutation_design.append(
                        list(i for i in range(group[0], group[-1] + 1)))
                else:
                    print(
                        "[Error] The codon list is missing either a start or end point around the 'n' location."
                    )
                    exit(1)
            else:
                #Add the other ranges without changing them
                self.list_mutation_design.append(group)
        return
Exemplo n.º 4
0
    def combine_pact(self):
        """From the config file it loads and combines pact files"""
        
        #For each dataset parse the config file for the files
        for dataset in self.dict_combined:

            #Info the user
            print("[Combine Pact] Merging " +  dataset)

            #Test if the section is present
            if not self.config_file.has_section(dataset):
                print("[Combine Pact Error] the config file is missing the section for " + dataset)

            #Load all of the files into a dict
            try:
                dict_files = dict(self.config_file.items(dataset))
            except NoSectionError:
                print("[Combine PACT] Can not find the dataset given, check config file.")
                quit()

            #Load the file
            for file in dict_files:

                #See if the file exists
                if file_checker(dict_files[file]):

                    #Load the pact file
                    with open(dict_files[file], 'rb') as infile:
                        try:
                            pact_dict = load(infile)
                        except UnpicklingError:
                            print("[PACT Error] Check your .pact filename.")
                            quit()

                    #Add the pact file to the dataset dict
                    for key in pact_dict:

                        #Warn the user if the key exists
                        if key in self.dict_combined[dataset]:
                            print("[Combine Pact Warning] Key: " + str(key) + " exists in dataset " + dataset)

                        #Add to the global dict
                        self.dict_combined[dataset][key] = pact_dict[key]

        return self.dict_combined
Exemplo n.º 5
0
#Set the author information
__author__ = "Justin R. Klesmith and Benjamin J. Hackel"
__copyright__ = [
    "Copyright (C) 2018 by Regents of the University of Minnesota",
    "Copyright (C) 2018 Justin R. Klesmith"
]
__license__ = "GPL-3.0"
__version__ = "2018.6"
__maintainer__ = "Justin R. Klesmith"
__email__ = ["*****@*****.**", "*****@*****.**"]

#Get the PACT script path
pact_path = dirname(realpath(argv[0]))

#Check to see if the protocols links exist
if file_checker(pact_path + "/pact/pact_protocols.ini"):
    #Load the module links config file into a directory
    config_parser = ConfigParser()

    #Load the global and workflow config elements
    config_parser.read(pact_path + "/pact/pact_protocols.ini")

    #Loop the module
    try:
        dict_protocols = {
            mapping[0].lower(): mapping[1].lower()
            for mapping in config_parser.items("protocols")
        }
    except NoSectionError:
        print("[PACT Error] The config file pact_protocol.ini is incorrect.")
        print(
Exemplo n.º 6
0
    def __init__(self, settings_dict):
        """Initialize the class varibles"""
        
        #Assign general DNA and AA settings
        self.wtdna = settings_dict['WTDNA']
        self.FirstAAMutated = int(settings_dict['FirstAAMutated'])-1
        self.LastAAMutated = int(settings_dict['LastAAMutated'])-1
        self.WTDNARegion = self.wtdna[self.FirstAAMutated*3:(self.LastAAMutated + 1) * 3]

        #Setup the output file prefix
        self.out_prefix = settings_dict['Out_Prefix']

        #Check our inported files
        if file_checker(settings_dict["Ref_Count_WildType"]):
            self.file_ref_count_wt = settings_dict["Ref_Count_WildType"]
        
        if file_checker(settings_dict["Sel_Count_WildType"]):
            self.file_sel_count_wt = settings_dict["Sel_Count_WildType"]
        
        if file_checker(settings_dict["Ref_Count"]):
            self.file_ref_count = settings_dict["Ref_Count"]        
            
        if file_checker(settings_dict["Sel_Count"]):
            self.file_sel_count = settings_dict["Sel_Count"]           
            
        if file_checker(settings_dict["Ref_Count_Rejected"]):
            self.file_ref_count_rejected = settings_dict["Ref_Count_Rejected"]        
            
        if file_checker(settings_dict["Sel_Count_Rejected"]):
            self.file_sel_count_rejected = settings_dict["Sel_Count_Rejected"]         

        #Set our thresholds
        try:
            self.ref_count_threshold = int(settings_dict["Ref_Count_Threshold"])
            self.sel_count_threshold = int(settings_dict["Sel_Count_Threshold"])
        except ValueError:
            print("[Error] The read count threshold is incorrectly set, defaulting to 12.")
            self.ref_count_threshold = 12
            self.sel_count_threshold = 12

        # 
        # do we enforce a strict count threshold?
        # 2022.1 new modes added
        # ref-only and sel-only to strictly enforce one or the other
        #
        try:
            if (settings_dict["Strict_Count_Threshold"].lower() == "true" or
            settings_dict["Strict_Count_Threshold"].lower() == "both"):
                self.strict_threshold = 10
            elif settings_dict["Strict_Count_Threshold"].lower() == "ref-only":
                self.strict_threshold = 11            
            elif settings_dict["Strict_Count_Threshold"].lower() == "sel-only":
                self.strict_threshold = 12            
            else:
                self.strict_threshold = 20
        except:
            self.strict_threshold = 20

        #
        # do we consider mutations rejected in our design yet pass
        # our read count filters to be in the total library count value?
        # 2022.1 - default is now false as theoretically a large amount
        # of non-designed mutations could skew the dataset distribution
        # in the non-normalized enrichments
        #
        try:
            if settings_dict["consider_rejected"].lower() == "true":
                self.consider_rejected = True
            else:
                self.consider_rejected = False
        except:
            self.consider_rejected = False
            
        #Import the mutation design list
        self.list_mutation_design = []
        for group in literal_eval(settings_dict['mutcodons']):
            #If we have n between two numbers
            if 'n' in group:
                #We need exactly three to expand the range
                if len(group) == 3:
                    #Get the start and end position, then iterate through the two points
                    self.list_mutation_design.append(list(i for i in range(group[0], group[-1] + 1)))
                else:
                    print("[Enrichment Error] The codon list is missing either a start or end point around the 'n' location.")
                    quit()
            else:
                #Add the other ranges without changing them
                self.list_mutation_design.append(group)
        return
Exemplo n.º 7
0
    def xml_to_fasta(self):
        """Convert the XML file into a fasta file for CDHIT"""

        #Assign these but catch value errors
        try:
            minquerylen = float(self.config_file.get("blastp_align_filter", "minquerylen"))
            minseqid = float(self.config_file.get("blastp_align_filter", "minseqid"))
        except ValueError:
            print("Incorrect value for the blastp_align_filter settings (string entered when it should be a number).")
            quit()

        #Get the input files
        self.file_ncbixml = self.config_file.get("blastp_align_filter", "ncbi_xml")

        #Import our xml tools
        from xml.etree import cElementTree

        #Check inputs for valid values
        file_checker(self.directory + self.file_ncbixml)

        if minquerylen > 1:
            print("[xml_to_fasta] The minimum lenth to query length is >1, setting to 1. (Valid values: 0.0 to 1.0)")
            minquerylen = 1
        elif minquerylen < 0:
            print("[xml_to_fasta] The minimum length to query length is <0, setting to 0. (Valid values: 0.0 to 1.0)")
            minquerylen = 0

        if minseqid > 1:
            print("[xml_to_fasta] The minimum sequence identity is >1, setting to 1. (Valid values: 0.0 to 1.0)")
            minseqid = 1
        elif minseqid < 0:
            print("[xml_to_fasta] The minimum sequence identity is <0, setting to 0. (Valid values: 0.0 to 1.0)")
            minseqid = 0
    
        #Flag to see if our WT is in the XML input
        bool_wt_in_ncbi = False 

        #Import NCBI information from TSV
        with open(self.directory + self.proteinname + ".fa", 'w') as file_fa:
            
            #Parse the XML file
            for event, elem in cElementTree.iterparse(self.directory + self.file_ncbixml):
                
                #Parse each hit
                if elem.tag == "Hit":
                    hit_id = elem.find("Hit_id").text
                    hit_name = elem.find("Hit_def").text
                    hit_accession = elem.find("Hit_accession").text
                    hit_identities = int(elem.find("Hit_hsps").find("Hsp").find("Hsp_identity").text)
                    hit_alignlen = int(elem.find("Hit_hsps").find("Hsp").find("Hsp_align-len").text)
                    hit_sequence = elem.find("Hit_hsps").find("Hsp").find("Hsp_hseq").text
            
                    #Verify that the alignment length is >= 60% of query
                    if float(hit_alignlen/self.wtlen) < minquerylen:
                        elem.clear()
                        continue

                    #Verify that the sequence identity is >= 30%
                    if float(hit_identities/hit_alignlen) < minseqid:
                        elem.clear()
                        continue

                    #If there is non-standard amino acids, remove
                    if any(c in hit_sequence for c in ['B', 'J', 'O', 'U', 'X', 'Z']):
                        elem.clear()
                        continue

                    #Remove dashes from the ncbi match (cd-hit errors them out as bad formatting)
                    hit_sequence_nodash = "".join(char for char in hit_sequence if char != "-")

                    #Check to see if the sequence was WT
                    if hit_sequence_nodash == self.wtaa:
                        bool_wt_in_ncbi = True

                        #Write the wild-type to the file
                        file_fa.write(self.proteinname + '\n')
                        file_fa.write(hit_sequence_nodash + '\n\n')
                    else:
                        #Write the other matches to the file
                        file_fa.write(">" + hit_accession + "_" + hit_id + "_" + hit_name + '\n')
                        file_fa.write(hit_sequence_nodash + '\n\n')
            
                    elem.clear()

            #If WT was not in the NCBI input then add it
            if bool_wt_in_ncbi is False:
                file_fa.write(self.proteinname + '\n')
                file_fa.write(self.wtaa + '\n')

        return
Exemplo n.º 8
0
    def process_msa(self):
        """Convert the MSA for use with PSI-Blast"""

        #ProcessMSA Settings
        try:
            nummaxhits = int(self.config_file.get("blastp_align_filter", "nummaxhits"))
        except ValueError:
            print("[Homology Error] An incorrect value in the input for max num hits, will be set as unlimited.")
            nummaxhits = -1

        #Check inputs for valid values
        file_checker(self.directory + self.proteinname + ".msa")

        #Step one: Import msa alignment from MUSCLE and make it one line per sequence
        with open(self.directory + self.proteinname + ".msa", 'r') as infile:

            alignment = ""
            output = ""
            for line in infile:
                #Check to see if we have a header
                if line[0] == ">":
                    #Ignores empty output
                    if len(output) > 0:
                        #Add the current output to the growing alignment varible
                        alignment = alignment + output + "\n"

                    #Empty the current alignment
                    output = ""

                    #Assemble the first line of the new sequence
                    output = output + line.rstrip('\n') + "\t"
                else:
                    #Keep assembling the line
                    output = output + line.rstrip('\n')

        #Step two: Import MSA into a lookup table
        dict_msa_table = {line.split("\t")[0]:line.split("\t")[1].rstrip("\n")
                         for line in alignment.split("\n")
                         if len(line) > 10}

        #Step three: Mark the insertions with the letter Z (fixed from X as X is also used in the blast hits)
        #Step four: Delete the insertions
        string_wt_msa = dict_msa_table[">" + self.proteinname] #Get the wild-type MSA sequence (with insertions)

        #Loop the entries in the dict
        for msa in dict_msa_table:

            #Go through the length of the entire msa string and mark it Z
            temp_zstr = "".join("Z" if string_wt_msa[i] == "-" 
                                else dict_msa_table[msa][i] 
                                for i in range(0, len(string_wt_msa)))

            #Now let's delete the Z'ed characters
            temp_zstr = "".join(char for char in temp_zstr if char != "Z")
        
            #Update the dict
            dict_msa_table[msa] = temp_zstr

        #Step five: Put wild-type on top, re-order the sequences by completeness, and cap the hits for psi-blast
        list_msa_ordered = []
        for msa in dict_msa_table:
            
            #Set the WT to the top and then add the rest into a list with their counts of dashes
            if dict_msa_table[msa] == self.wtaa:
                #Move the wild-type to the top
                list_msa_ordered.insert(0, {'sequence' : dict_msa_table[msa], 
                                            'counts' : dict_msa_table[msa].count('-'),
                                            'wt' : True})
            else:
                list_msa_ordered.append({'sequence' : dict_msa_table[msa], 
                                         'counts' : dict_msa_table[msa].count('-'),
                                         'wt' : False})

        #Sort the table into a new list by counts and wt
        list_msa_ordered.sort(key=lambda k : (k['counts'], -k['wt']))

        #Create a list of just the sequences up to our limit
        if nummaxhits == -1 or nummaxhits < 1: #Unlimited number of output sequences
            list_msa_pb = list(x['sequence'] for x in list_msa_ordered)
        else:
            list_msa_pb = list(x['sequence'] for x in list_msa_ordered)[:nummaxhits]

        return list_msa_pb
Exemplo n.º 9
0
    def protocol(self):
        """Main entrypoint for the protocol"""

        #Create a output log file that we can append to
        with open(
                self.directory + self.output_prefix + "_" +
                strftime("%m_%d_%Y") + "-" + strftime("%H_%M_%S") +
                '_output.txt', 'w') as file_output:
            file_output.write(self.pact_preamble + "\n")
            """
            *****************************************
            FASTQ Merge Section
            *****************************************
            """
            if self.dict_workflow['fastq_merge_sel'] or self.dict_workflow[
                    'fastq_merge_ref']:
                #Import our fastq_merge file
                try:
                    from pact.sequencing.fastq_merge import fastq_merge
                except ImportError:
                    print(
                        "[Protocols:Fitness Error] fastq_merge was not found.")

            if self.dict_workflow['fastq_merge_ref']:
                #Set custom locations for the fastq files
                if self.obj_cfgparser.get('fastq_merge_ref',
                                          'directory') != "":
                    ref_fastq_dir = self.obj_cfgparser.get(
                        'fastq_merge_ref', 'directory')
                else:
                    ref_fastq_dir = self.directory

                if self.obj_cfgparser.get(
                        'fastq_merge_ref',
                        'forward_fastq') == self.obj_cfgparser.get(
                            'fastq_merge_ref', 'reverse_fastq'):
                    print(
                        "[Protocols:Fitness Error] The reference forward and reverse fastq files are the same."
                    )
                    quit()

                #Create our options dicts
                try:
                    dict_fastqmerge_options_ref = {
                        'Processes':
                        self.processes,
                        'Forward_FASTQ':
                        ref_fastq_dir + self.obj_cfgparser.get(
                            'fastq_merge_ref', 'forward_fastq'),
                        'Reverse_FASTQ':
                        ref_fastq_dir + self.obj_cfgparser.get(
                            'fastq_merge_ref', 'reverse_fastq'),
                        'Out_Prefix':
                        self.directory + self.output_prefix + "_Ref",
                        'Min_Coverage':
                        float(
                            self.obj_cfgparser.get('fastq_merge_ref',
                                                   'min_coverage')),
                    }
                except NoSectionError:
                    print(
                        "[Protocols:Fitness Error] The fastq_merge config file is incorrect."
                    )
                    print(
                        "[Protocols:Fitness Error] There is something wrong with the [fastq_merge_X] section."
                    )
                    quit()
                except NoOptionError:
                    print(
                        "[Protocols:Fitness Error] The fastq_merge config file is incorrect."
                    )
                    print(
                        "[Protocols:Fitness Error] There is something wrong with the name of a option flag."
                    )
                    quit()

                #Create the object then call the first file
                obj_fastqmerge_ref = fastq_merge(dict_fastqmerge_options_ref)

                print("[Protocols:Fitness] Merging the reference fastq files.")
                file_output.write(
                    "[Protocols:Fitness] Merging the reference fastq files.\n")
                file_output.write(obj_fastqmerge_ref.fastq_merge() + "\n")

            if self.dict_workflow['fastq_merge_sel']:
                #Set custom locations for the fastq files
                if self.obj_cfgparser.get('fastq_merge_sel',
                                          'directory') != "":
                    sel_fastq_dir = self.obj_cfgparser.get(
                        'fastq_merge_sel', 'directory')
                else:
                    sel_fastq_dir = self.directory

                #Do a check if the user accidently lists the same file for the fwd and rev
                if self.obj_cfgparser.get(
                        'fastq_merge_sel',
                        'forward_fastq') == self.obj_cfgparser.get(
                            'fastq_merge_sel', 'reverse_fastq'):
                    print(
                        "[Protocols:Fitness Error] The selected forward and reverse fastq files are the same."
                    )
                    quit()

                #Create our options dicts
                try:
                    dict_fastqmerge_options_sel = {
                        'Processes':
                        self.processes,
                        'Forward_FASTQ':
                        sel_fastq_dir + self.obj_cfgparser.get(
                            'fastq_merge_sel', 'forward_fastq'),
                        'Reverse_FASTQ':
                        sel_fastq_dir + self.obj_cfgparser.get(
                            'fastq_merge_sel', 'reverse_fastq'),
                        'Out_Prefix':
                        self.directory + self.output_prefix + "_Sel",
                        'Min_Coverage':
                        float(
                            self.obj_cfgparser.get('fastq_merge_sel',
                                                   'min_coverage')),
                    }
                except NoSectionError:
                    print(
                        "[Protocols:Fitness Error] The fastq_merge config file is incorrect."
                    )
                    print(
                        "[Protocols:Fitness Error] There is something wrong with the [fastq_merge_X] section."
                    )
                    quit()
                except NoOptionError:
                    print(
                        "[Protocols:Fitness Error] The fastq_merge config file is incorrect."
                    )
                    print(
                        "[Protocols:Fitness Error] There is something wrong with the name of a option flag."
                    )
                    quit()

                #Create the object then call the first file
                obj_fastqmerge_sel = fastq_merge(dict_fastqmerge_options_sel)

                print("[Protocols:Fitness] Merging the selected fastq files.")
                file_output.write(
                    "[Protocols:Fitness] Merging the selected fastq files.\n")
                file_output.write(obj_fastqmerge_sel.fastq_merge() + "\n")
            """
            *****************************************
            FASTQ Filter Section
            *****************************************
            """
            if self.dict_workflow[
                    'fastq_filter_translate_ref'] or self.dict_workflow[
                        'fastq_filter_translate_sel']:
                #Import our fastq_reader file
                try:
                    from pact.sequencing.fastq_filter_translate import fastq_filter_translate
                except ImportError:
                    print(
                        "[Protocols:Fitness Error] fastq_filter_translate was not found."
                    )

            if self.dict_workflow['fastq_filter_translate_ref']:

                try:
                    #Test if we have a special input file
                    if len(
                            self.obj_cfgparser.get(
                                'fastq_filter_translate_ref',
                                'fastq_file')) > 0:
                        file_input_fastq_ref = self.obj_cfgparser.get(
                            'fastq_filter_translate_ref', 'fastq_file')
                    else:
                        if file_checker(self.directory + self.output_prefix +
                                        "_Ref_Merge.fastq"):
                            file_input_fastq_ref = self.directory + self.output_prefix + "_Ref_Merge.fastq"

                    dict_fastqread_options_ref = {
                        'WTDNA':
                        self.wtdna,
                        'WTAA':
                        self.wtaa,
                        'FirstAAMutated':
                        self.firstaamutated,
                        'LastAAMutated':
                        self.lastaamutated,
                        'Processes':
                        self.processes,
                        '5pAnchor':
                        self.obj_cfgparser.get('fastq_filter_translate_ref',
                                               'fiveprimeanchor'),
                        'MutThreshold':
                        self.mutthreshold,
                        'QAverage':
                        self.obj_cfgparser.get('fastq_filter_translate_ref',
                                               'qaverage'),
                        'QLimit':
                        self.obj_cfgparser.get('fastq_filter_translate_ref',
                                               'qlimit'),
                        'In_File':
                        file_input_fastq_ref,
                        'Out_Prefix':
                        self.directory + self.output_prefix + "_Ref",
                        'Enable_Anchors':
                        self.obj_cfgparser.get('fastq_filter_translate_ref',
                                               'enable_anchors')
                    }
                except NoSectionError:
                    print(
                        "[Protocols:Fitness Error] The fastq_filter_translate_ref config file is incorrect."
                    )
                    print(
                        "[Protocols:Fitness Error] There is something wrong with the [fastq_filter_translate_ref] section."
                    )
                    quit()
                except NoOptionError:
                    print(
                        "[Protocols:Fitness Error] The fastq_filter_translate_ref config file is incorrect."
                    )
                    print(
                        "[Protocols:Fitness Error] There is something wrong with the name of a option flag."
                    )
                    quit()

                #Create the object then call the first file
                obj_fastqfilter_ref = fastq_filter_translate(
                    dict_fastqread_options_ref)

                print("[Protocols:Fitness] Reading the reference fastq files.")
                file_output.write(
                    "[Protocols:Fitness] Reading the reference fastq files.\n")
                file_output.write(
                    obj_fastqfilter_ref.fastq_filter_translate() + "\n")

            if self.dict_workflow['fastq_filter_translate_sel']:

                try:
                    #Test if we have a special input file
                    if len(
                            self.obj_cfgparser.get(
                                'fastq_filter_translate_sel',
                                'fastq_file')) > 0:
                        file_input_fastq_sel = self.obj_cfgparser.get(
                            'fastq_filter_translate_sel', 'fastq_file')
                    else:
                        if file_checker(self.directory + self.output_prefix +
                                        "_Sel_Merge.fastq"):
                            file_input_fastq_sel = self.directory + self.output_prefix + "_Sel_Merge.fastq"

                    dict_fastqread_options_sel = {
                        'WTDNA':
                        self.wtdna,
                        'WTAA':
                        self.wtaa,
                        'FirstAAMutated':
                        self.firstaamutated,
                        'LastAAMutated':
                        self.lastaamutated,
                        'Processes':
                        self.processes,
                        '5pAnchor':
                        self.obj_cfgparser.get('fastq_filter_translate_sel',
                                               'fiveprimeanchor'),
                        'MutThreshold':
                        self.mutthreshold,
                        'QAverage':
                        self.obj_cfgparser.get('fastq_filter_translate_sel',
                                               'qaverage'),
                        'QLimit':
                        self.obj_cfgparser.get('fastq_filter_translate_sel',
                                               'qlimit'),
                        'In_File':
                        file_input_fastq_sel,
                        'Out_Prefix':
                        self.directory + self.output_prefix + "_Sel",
                        'Enable_Anchors':
                        self.obj_cfgparser.get('fastq_filter_translate_sel',
                                               'enable_anchors')
                    }

                except NoSectionError:
                    print(
                        "[Protocols:Fitness Error] The fastq_filter_translate_sel config file is incorrect."
                    )
                    print(
                        "[Protocols:Fitness Error] There is something wrong with the [fastq_filter_translate_sel] section."
                    )
                    quit()
                except NoOptionError:
                    print(
                        "[Protocols:Fitness Error] The fastq_filter_translate_sel config file is incorrect."
                    )
                    print(
                        "[Protocols:Fitness Error] There is something wrong with the name of a option flag."
                    )
                    quit()

                #Create the object then call the first file
                obj_fastqfilter_sel = fastq_filter_translate(
                    dict_fastqread_options_sel)

                print("[Protocols:Fitness] Reading the selected fastq files.")
                file_output.write(
                    "[Protocols:Fitness] Reading the selected fastq files.\n")
                file_output.write(
                    obj_fastqfilter_sel.fastq_filter_translate() + "\n")
            """
            *****************************************
            Filter Counter Section
            *****************************************
            """
            if self.dict_workflow['filter_counter_sel'] or self.dict_workflow[
                    'filter_counter_ref']:
                #Import our filter_counter file
                try:
                    from pact.sequencing.mut_filter import mut_filter
                    from pact.sequencing.mut_counter import mut_counter
                except ImportError:
                    print(
                        "[Protocols:Fitness Error] mut_filter was not found.")

            if self.dict_workflow['filter_counter_ref']:
                try:
                    if len(
                            self.obj_cfgparser.get('filter_counter_ref',
                                                   'read_file')) > 0:
                        file_input_fastqread_ref = self.obj_cfgparser.get(
                            'filter_counter_ref', 'read_file')
                    else:
                        if file_checker(self.directory + self.output_prefix +
                                        "_Ref_Read.tsv"):
                            file_input_fastqread_ref = self.directory + self.output_prefix + "_Ref_Read.tsv"

                    dict_filtercounter_options_ref = {
                        'WTDNA': self.wtdna,
                        'WTAA': self.wtaa,
                        'FirstAAMutated': self.firstaamutated,
                        'LastAAMutated': self.lastaamutated,
                        'MutThreshold': self.mutthreshold,
                        'Processes': self.processes,
                        'In_File': file_input_fastqread_ref,
                        'Out_Prefix':
                        self.directory + self.output_prefix + "_Ref",
                        'mutcodons': self.mutcodons,
                    }
                except NoSectionError:
                    print(
                        "[Protocols:Fitness Error] The filter_counter config file is incorrect."
                    )
                    print(
                        "[Protocols:Fitness Error] There is something wrong with the [filter_counter_X] section."
                    )
                    quit()
                except NoOptionError:
                    print(
                        "[Protocols:Fitness Error] The filter_counter config file is incorrect."
                    )
                    print(
                        "[Protocols:Fitness Error] There is something wrong with the name of a option flag."
                    )
                    quit()

                #Create the object then call the first file
                obj_filter_ref = mut_filter(dict_filtercounter_options_ref)

                print("[Protocols:Fitness] Filtering the reference tsv.")
                file_output.write(
                    "[Protocols:Fitness] Filtering the reference tsv.\n")
                file_output.write(obj_filter_ref.mut_filter() + "\n")

                #Create the object then call the first file
                obj_counter_ref = mut_counter(dict_filtercounter_options_ref)

                print("[Protocols:Fitness] Counting the reference mutations.")
                file_output.write(
                    "[Protocols:Fitness] Counting the reference mutations.\n")
                file_output.write(obj_counter_ref.mut_counter() + "\n")

            if self.dict_workflow['filter_counter_sel']:

                try:
                    #Test if we have a special input file
                    if len(
                            self.obj_cfgparser.get('filter_counter_sel',
                                                   'read_file')) > 0:
                        file_input_fastqread_sel = self.obj_cfgparser.get(
                            'filter_counter_sel', 'read_file')
                    else:
                        if file_checker(self.directory + self.output_prefix +
                                        "_Sel_Read.tsv"):
                            file_input_fastqread_sel = self.directory + self.output_prefix + "_Sel_Read.tsv"

                    dict_filtercounter_options_sel = {
                        'WTDNA': self.wtdna,
                        'WTAA': self.wtaa,
                        'FirstAAMutated': self.firstaamutated,
                        'LastAAMutated': self.lastaamutated,
                        'MutThreshold': self.mutthreshold,
                        'Processes': self.processes,
                        'In_File': file_input_fastqread_sel,
                        'Out_Prefix':
                        self.directory + self.output_prefix + "_Sel",
                        'mutcodons': self.mutcodons,
                    }

                except NoSectionError:
                    print(
                        "[Protocols:Fitness Error] The filter_counter config file is incorrect."
                    )
                    print(
                        "[Protocols:Fitness Error] There is something wrong with the [filter_counter_X] section."
                    )
                    quit()
                except NoOptionError:
                    print(
                        "[Protocols:Fitness Error] The filter_counter config file is incorrect."
                    )
                    print(
                        "[Protocols:Fitness Error] There is something wrong with the name of a option flag."
                    )
                    quit()

                #Create the object then call the first file
                obj_filter = mut_filter(dict_filtercounter_options_sel)

                print("[Protocols:Fitness] Filtering the selected tsv.")
                file_output.write(
                    "[Protocols:Fitness] Filtering the selected tsv.\n")
                file_output.write(obj_filter.mut_filter() + "\n")

                #Create the object then call the first file
                obj_counter_sel = mut_counter(dict_filtercounter_options_sel)

                print("[Protocols:Fitness] Counting the selected mutations.")
                file_output.write(
                    "[Protocols:Fitness] Counting the selected mutations.\n")
                file_output.write(obj_counter_sel.mut_counter() + "\n")
            """
            *****************************************
            Enrichment Section
            *****************************************
            """
            if self.dict_workflow['enrichment']:

                # do we consider rejected mutations (based on design) from the total count?
                try:
                    consider_rejected = self.obj_cfgparser.get(
                        'enrichment', 'consider_rejected')
                except NoOptionError:
                    consider_rejected = 'false'

                try:
                    #Test if we have a special input file
                    if len(
                            self.obj_cfgparser.get('enrichment',
                                                   'ref_count_wildtype')) > 0:
                        file_input_countwt_ref = self.obj_cfgparser.get(
                            'enrichment', 'ref_count_wildtype')
                    else:
                        if file_checker(self.directory + self.output_prefix +
                                        "_Ref_Counted_WildType.tsv"):
                            file_input_countwt_ref = self.directory + self.output_prefix + "_Ref_Counted_WildType.tsv"

                    #Test if we have a special input file
                    if len(
                            self.obj_cfgparser.get('enrichment',
                                                   'sel_count_wildtype')) > 0:
                        file_input_countwt_sel = self.obj_cfgparser.get(
                            'enrichment', 'sel_count_wildtype')
                    else:
                        if file_checker(self.directory + self.output_prefix +
                                        "_Sel_Counted_WildType.tsv"):
                            file_input_countwt_sel = self.directory + self.output_prefix + "_Sel_Counted_WildType.tsv"

                    #Test if we have a special input file
                    if len(self.obj_cfgparser.get('enrichment',
                                                  'ref_count')) > 0:
                        file_input_count_ref = self.obj_cfgparser.get(
                            'enrichment', 'ref_count')
                    else:
                        if file_checker(self.directory + self.output_prefix +
                                        "_Ref_Counted.tsv"):
                            file_input_count_ref = self.directory + self.output_prefix + "_Ref_Counted.tsv"

                    #Test if we have a special input file
                    if len(self.obj_cfgparser.get('enrichment',
                                                  'sel_count')) > 0:
                        file_input_count_sel = self.obj_cfgparser.get(
                            'enrichment', 'sel_count')
                    else:
                        if file_checker(self.directory + self.output_prefix +
                                        "_Sel_Counted.tsv"):
                            file_input_count_sel = self.directory + self.output_prefix + "_Sel_Counted.tsv"

                    #Test if we have a special input file
                    if len(
                            self.obj_cfgparser.get('enrichment',
                                                   'ref_count_rejected')) > 0:
                        file_input_countrej_ref = self.obj_cfgparser.get(
                            'enrichment', 'ref_count_rejected')
                    else:
                        if file_checker(self.directory + self.output_prefix +
                                        "_Ref_Counted_Rejected.tsv"):
                            file_input_countrej_ref = self.directory + self.output_prefix + "_Ref_Counted_Rejected.tsv"

                    #Test if we have a special input file
                    if len(
                            self.obj_cfgparser.get('enrichment',
                                                   'sel_count_rejected')) > 0:
                        file_input_countrej_sel = self.obj_cfgparser.get(
                            'enrichment', 'sel_count_rejected')
                    else:
                        if file_checker(self.directory + self.output_prefix +
                                        "_Sel_Counted_Rejected.tsv"):
                            file_input_countrej_sel = self.directory + self.output_prefix + "_Sel_Counted_Rejected.tsv"

                    enrichment_settings = {
                        'WTDNA':
                        self.wtdna,
                        'FirstAAMutated':
                        self.firstaamutated,
                        'LastAAMutated':
                        self.lastaamutated,
                        'Out_Prefix':
                        self.directory + self.output_prefix,
                        'mutcodons':
                        self.mutcodons,
                        "Ref_Count_WildType":
                        file_input_countwt_ref,
                        "Sel_Count_WildType":
                        file_input_countwt_sel,
                        "Ref_Count":
                        file_input_count_ref,
                        "Sel_Count":
                        file_input_count_sel,
                        "Ref_Count_Rejected":
                        file_input_countrej_ref,
                        "Sel_Count_Rejected":
                        file_input_countrej_sel,
                        "Ref_Count_Threshold":
                        self.obj_cfgparser.get('enrichment',
                                               'ref_count_threshold'),
                        "Sel_Count_Threshold":
                        self.obj_cfgparser.get('enrichment',
                                               'sel_count_threshold'),
                        "Strict_Count_Threshold":
                        self.obj_cfgparser.get('enrichment',
                                               'strict_count_threshold'),
                        "consider_rejected":
                        consider_rejected,
                    }
                except NoSectionError:
                    print(
                        "[Protocols:Fitness Error] The enrichment config file is incorrect."
                    )
                    print(
                        "[Protocols:Fitness Error] There is something wrong with the [enrichment] section."
                    )
                    quit()
                except NoOptionError:
                    print(
                        "[Protocols:Fitness Error] The enrichment config file is incorrect."
                    )
                    print(
                        "[Protocols:Fitness Error] There is something wrong with the name of a option flag."
                    )
                    quit()

                #Import our enrichment_fitness file
                try:
                    from pact.sequencing.enrichment import enrichment
                except ImportError:
                    print(
                        "[Protocols:Fitness Error] enrichment was not found.")

                #Create the object
                obj_enrichment = enrichment(enrichment_settings)

                #Calculate the enrichment
                print("[Protocols:Fitness] Calculating the enrichment")
                file_output.write(
                    "[Protocols:Fitness] Calculating the enrichment\n" +
                    obj_enrichment.enrichment())
            """
            *****************************************
            Fitness Section
            *****************************************
            """
            if self.dict_workflow['fitness']:

                #Import our fitness file
                try:
                    from pact.sequencing.fitness import fitness
                except ImportError:
                    print("[Protocols:Fitness Error] fitness was not found.")

                #Create the object
                obj_fitness = fitness(
                    self.obj_cfgparser, self.dict_programs, {
                        'directory': self.directory,
                        'WTDNA': self.wtdna,
                        'WTAA': self.wtaa,
                        'FirstAAMutated': self.firstaamutated,
                        'LastAAMutated': self.lastaamutated,
                        'Out_Prefix': self.directory + self.output_prefix,
                        'mutcodons': self.mutcodons,
                        'library_type': self.mutationtype
                    })

                #Calculate the fitness
                print("[Protocols:Fitness] Calculating the fitness")
                file_output.write(
                    "[Protocols:Fitness] Calculating the fitness\n" +
                    obj_fitness.fitness())
            """
            *****************************************
            Calculate mutation freqs and mutual info
            *****************************************
            """
            if self.mutationtype == 'multiple':
                if self.dict_workflow['multiple_freq_mi']:
                    #Import our codon frequency and mutual information class
                    try:
                        from pact.sequencing.multiple_freq_mi import multiple_freq_mi
                    except ImportError:
                        print(
                            "[Protocols:Fitness Error] multiple_freq_mi was not found."
                        )

                    #Create the object
                    obj_freqmi = multiple_freq_mi(
                        self.obj_cfgparser, self.dict_programs, {
                            'directory': self.directory,
                            'WTDNA': self.wtdna,
                            'WTAA': self.wtaa,
                            'FirstAAMutated': self.firstaamutated,
                            'LastAAMutated': self.lastaamutated,
                            'Out_Prefix': self.directory + self.output_prefix,
                            'mutcodons': self.mutcodons,
                            'library_type': self.mutationtype
                        })

                    #Calculate the codon frequency and mutual information
                    print(
                        "[Protocols:Fitness] Calculating the codon frequency and mutual information"
                    )
                    file_output.write(
                        "[Protocols:Fitness] Calculating the fitness\n" +
                        obj_freqmi.multiple_freq_mi())
            """
            *****************************************
            Library Stats Section
            *****************************************
            """
            if self.dict_workflow['library_stats']:

                try:
                    #Test if we have a special input file
                    if len(
                            self.obj_cfgparser.get(
                                'library_stats',
                                'pact_enrichment_summary')) > 0:
                        file_enrich_summary = self.obj_cfgparser.get(
                            'library_stats', 'pact_enrichment_summary')
                    else:
                        if file_checker(self.directory + self.output_prefix +
                                        "_enrichment_summary.pact"):
                            file_enrich_summary = self.directory + self.output_prefix + "_enrichment_summary.pact"

                    #Test if we have a special input file
                    if len(
                            self.obj_cfgparser.get(
                                'library_stats', 'pact_fitness_nonsynon')) > 0:
                        file_fitness_nonsynon = self.obj_cfgparser.get(
                            'library_stats', 'pact_fitness_nonsynon')
                    else:
                        if file_checker(self.directory + self.output_prefix +
                                        "_fitness_nonsynon.pact"):
                            file_fitness_nonsynon = self.directory + self.output_prefix + "_fitness_nonsynon.pact"

                    #Test if we have a special input file
                    if len(
                            self.obj_cfgparser.get(
                                'library_stats', 'pact_fitness_wtsynon')) > 0:
                        file_fitness_wtsynon = self.obj_cfgparser.get(
                            'library_stats', 'pact_fitness_wtsynon')
                    else:
                        if file_checker(self.directory + self.output_prefix +
                                        "_fitness_wtsynon.pact"):
                            file_fitness_wtsynon = self.directory + self.output_prefix + "_fitness_wtsynon.pact"

                    library_settings = {
                        'file_summary':
                        file_enrich_summary,
                        'pact_fitness_nonsynon':
                        file_fitness_nonsynon,
                        'pact_fitness_wtsynon':
                        file_fitness_wtsynon,
                        'WTDNA':
                        self.wtdna,
                        'WTAA':
                        self.wtaa,
                        'FirstAAMutated':
                        self.firstaamutated,
                        'LastAAMutated':
                        self.lastaamutated,
                        'Out_Prefix':
                        self.directory + self.output_prefix,
                        'mutcodons':
                        self.mutcodons,
                        'library_type':
                        self.mutationtype,
                        'codon_type':
                        self.obj_cfgparser.get('library_stats', 'codon_type'),
                    }
                except NoSectionError:
                    print(
                        "[Protocols:Fitness Error] The library_stats config file is incorrect."
                    )
                    print(
                        "[Protocols:Fitness Error] There is something wrong with the [library_stats] section."
                    )
                    quit()
                except NoOptionError:
                    print(
                        "[Protocols:Fitness Error] The library_stats config file is incorrect."
                    )
                    print(
                        "[Protocols:Fitness Error] There is something wrong with the name of a option flag."
                    )
                    quit()

                #Import our library_stats file
                try:
                    from pact.sequencing.library_stats import library_stats
                except ImportError:
                    print(
                        "[Protocols:Fitness Error] library_stats was not found."
                    )

                #Create or library stats object
                obj_libstat = library_stats(library_settings)

                #Call our entrypoint method
                file_output.write(obj_libstat.library_stats() + "\n")

        return
Exemplo n.º 10
0
    def read_csv_fitness(self):
        """From the config file it loads and combines pact files"""

        #For each dataset parse the config file for the files
        for dataset in self.dict_combined:

            #Info the user
            print("[Read CSV] Merging " + dataset)

            #Test if the section is present
            if not self.config_file.has_section(dataset):
                print(
                    "[Read CSV Error] the config file is missing the section for "
                    + dataset)

            #Load all of the files into a dict
            try:
                dict_section = dict(self.config_file.items(dataset))
            except NoSectionError:
                print(
                    "[Read CSV] Can not find the dataset given, check config file."
                )
                quit()

            #See if the file exists
            if file_checker(dict_section['file']):

                #Load the pact file
                with open(dict_section['file'], 'r') as infile:
                    list_file = infile.readlines()

                #Figure out which column is which
                column_location = list_file[0].split(',').index(
                    dict_section['location'])

                #Figure out which column is which
                column_mutation = list_file[0].split(',').index(
                    dict_section['mutation'])

                #Figure out which column is which
                column_fitness = list_file[0].split(',').index(
                    dict_section['fitness'])

                #Figure out which column is which
                starting_index = int(dict_section['starting_index'])

                if starting_index == 0:
                    wt_offset = 0
                elif starting_index == 1:
                    wt_offset = -1

                #Add to our dict, skipping the header
                for line in list_file[1:]:

                    #Set our variables
                    loc = int(line.split(',')[column_location])
                    mut = line.split(',')[column_mutation]

                    #Add our location if not existing
                    if loc not in self.dict_combined[dataset]:
                        self.dict_combined[dataset][loc] = {}

                    #Warn the user if the key exists
                    if mut in self.dict_combined[dataset][loc]:
                        print("[Read CSV Warning] Key: " + str(loc) +
                              str(mut) + " exists in dataset " + dataset)

                    #Add to the global dict
                    fitness = line.split(',')[column_fitness]

                    if fitness == 'NS':
                        fitness = 'NaN'
                    else:
                        fitness = float(fitness)

                    #Get the wild-type residue
                    wt_resi = self.config_file.get(dataset,
                                                   'wtaa')[loc + wt_offset]

                    self.dict_combined[dataset][loc][mut] = {
                        'location': loc,
                        'fitness': fitness,
                        'mutation': mut,
                        'wt_residue': wt_resi,
                        'sd_from_wt': 'NaN'
                    }

            #Save a pact file for our dataset
            save_pact_file(
                self.dict_combined[dataset],
                self.dict_protocolconfig['directory'] + '/' + dataset)

        return self.dict_combined
Exemplo n.º 11
0
    def __init__(self, settings_dict):
        """Import our settings and saved config file."""

        #Assign general DNA and AA settings
        self.wtdna = settings_dict['WTDNA']
        self.wtaa = settings_dict['WTAA']
        self.FirstAAMutated = int(settings_dict['FirstAAMutated']) - 1
        self.LastAAMutated = int(settings_dict['LastAAMutated']) - 1
        self.out_prefix = settings_dict['Out_Prefix']

        #Import the library mode (Single or Multiple)
        if settings_dict['library_type'].lower() == 'single':
            self.library_type = "single"
        elif settings_dict['library_type'].lower() == 'multiple':
            self.library_type = "multiple"
        else:
            print(
                "[Library Stats Error] Unknown mode (expect single or multiple)."
            )
            quit()

        #Check our inported files and load
        if file_checker(settings_dict["file_summary"]):
            with open(settings_dict['file_summary'], 'rb') as file_summary:
                self.dict_summary = load(file_summary)

        if file_checker(settings_dict["pact_fitness_nonsynon"]):
            with open(settings_dict['pact_fitness_nonsynon'],
                      'rb') as file_accepted:
                self.fitness = load(file_accepted)

        if file_checker(settings_dict["pact_fitness_wtsynon"]):
            with open(settings_dict['pact_fitness_wtsynon'],
                      'rb') as file_wtsynon:
                self.dict_wtsynon = load(file_wtsynon)

        #Parse our pact files for global variables
        self.ref_threshold = int(self.dict_summary['ref_count_threshold'])
        self.sel_threshold = int(self.dict_summary['sel_count_threshold'])

        #Let the mutation design list
        self.list_mutation_design = []
        for group in literal_eval(settings_dict['mutcodons']):
            #If we have n between two numbers
            if 'n' in group:
                #We need exactly three to expand the range
                if len(group) == 3:
                    #Get the start and end position, then iterate through the two points
                    self.list_mutation_design.append(
                        list(i for i in range(group[0], group[-1] + 1)))
                else:
                    print(
                        "[Error] The codon list is missing either a start or end point around the 'n' location."
                    )
                    quit()
            else:
                #Add the other ranges without changing them
                self.list_mutation_design.append(group)

        #'codon_type':"{'NNK':[[161,'n',256]]}"
        #Let the mutation design list
        list_codon_design = []
        dict_codon_design = literal_eval(settings_dict['codon_type'])

        #Enumerate each codon key
        for codon in dict_codon_design:

            #Enumerate the list
            for group in dict_codon_design[codon]:

                #If we have n between two numbers
                if 'n' in group:

                    #We need exactly three to expand the range
                    if len(group) == 3:
                        #Get the start and end position, then iterate through the two points
                        list_codon_design.append(
                            {i: codon
                             for i in range(group[0], group[-1] + 1)})
                    else:
                        print(
                            "[Error] The codon list is missing either a start or end point around the 'n' location."
                        )
                        quit()
                else:
                    #Add the other ranges without changing them
                    list_codon_design.append({j: codon for j in group})

        #Now we need to flatten and combine the dicts
        self.dict_codon_design = {
            k: v
            for list_entry in list_codon_design for k, v in list_entry.items()
        }

        return
Exemplo n.º 12
0
    def mutation_counter(self, file_in, file_out, header):
        """Open our filtered file and then count our mutations"""

        #Check if the file exists
        if file_checker(file_in) == False:
            print(
                "[Mutation Counter Error] Filtered tsv is missing and cannot be counted."
            )
            quit()

        #Open the input file and count each unique line
        try:
            #Setup a string to keep the remainder in
            int_fileend = 1
            int_header = 1
            str_remainder = ''
            list_lines = []

            #Open our file and read in chunks
            with open(file_in, 'r') as infile:

                #Old Method
                #list_lines = infile.read().splitlines()[1:]

                while int_fileend:
                    #Prepend our remainder and read the next chunk
                    block = str_remainder + infile.read(100000)

                    #EOF
                    if len(block) < 100000:
                        int_fileend = 0

                    #Split our line
                    splitline = block.split("\n")

                    #Add our line to the main list (minus the last line if we're not done)
                    for i in range(int_header, len(splitline) - int_fileend):

                        #Set the header to 0 (as we want to skip the first line with it 1 originally)
                        int_header = 0

                        #Append and remove
                        list_lines.append(splitline[i])

                    #Add our remainder
                    str_remainder = '\n'.join(splitline[-1:])

            #Count our lines, Remove empty lines
            counts = Counter(l for l in list(filter(None, list_lines)))

            #Output the existing line and append the count to the end
            with open(file_out, 'w') as file_counted:
                file_counted.write(header)

                for line, count in counts.most_common():
                    file_counted.write(line + "\t" + str(count) + "\n")

        except MemoryError:
            print(
                "[Mutation Counter Error] Out of memory. Please install more ram or use 64-bit python."
            )
            quit()

        except OSError as error:
            #OSError 22 - Invalid Argument, when opening huge files esp. on OSX, will crash on rejected sequences
            if error.errno == EINVAL:
                print(
                    "[Mutation Counter Error] Are you on OSX? Bug with Python in opening large files."
                )
                quit()

        return
Exemplo n.º 13
0
    def __init__(self, obj_config, dict_programs, dict_protocolconfig):
        """Initialize the class varibles"""
        
        #Get the config file parser object from the calling class
        self.config_file = obj_config

        #Get the PACT dict programs
        self.dict_programs = dict_programs

        #Get the extra config options assigned by the protocol
        self.dict_protocolconfig = dict_protocolconfig

        #Check if the config file section is defined
        if not self.config_file.has_section("fitness"):
            print("[Fitness Error] the config file is missing the section [fitness]")
            quit()

        #Set the class specific variables assigned by the protocol
        self.directory = self.dict_protocolconfig['directory']
        self.out_prefix = self.dict_protocolconfig['Out_Prefix']
        self.wtdna = self.dict_protocolconfig['WTDNA']
        self.wtaa = self.dict_protocolconfig['WTAA']
        self.FirstAAMutated = int(self.dict_protocolconfig['FirstAAMutated'])-1
        self.LastAAMutated = int(self.dict_protocolconfig['LastAAMutated'])-1
        self.WTAARegion = self.wtaa[self.FirstAAMutated:self.LastAAMutated + 1]
        self.WTDNARegion = self.wtdna[self.FirstAAMutated*3:(self.LastAAMutated + 1) * 3]
        
        #Import the library mode (Single or Multiple)
        if self.dict_protocolconfig['library_type'].lower() == 'single':
            self.library_type = "single"
        elif self.dict_protocolconfig['library_type'].lower() == 'multiple':
            self.library_type = "multiple"
        else:
            print("[Fitness Error] Unknown mode (expect single or multiple).")
            quit()

        #Import settings from the config file on file imports
        try:
            #Test if we have a special input file
            if len(self.config_file.get('fitness', 'pact_enrichment_summary')) > 0:
                file_pact_summary = self.config_file.get('fitness', 'pact_enrichment_summary')
            else:
                file_pact_summary = self.out_prefix + "_enrichment_summary.pact"

            #Test if we have a special input file
            if len(self.config_file.get('fitness', 'pact_enrichment_accept_nonsynon')) > 0:
                file_pact_accepted = self.config_file.get('fitness', 'pact_enrichment_accept_nonsynon')
            else:
                file_pact_accepted = self.out_prefix + "_enrichment_accept_nonsynon.pact"

            #Test if we have a special input file
            if len(self.config_file.get('fitness', 'pact_enrichment_wtsynon')) > 0:
                file_pact_wtsynon = self.config_file.get('fitness', 'pact_enrichment_wtsynon')
            else:
                file_pact_wtsynon = self.out_prefix + "_enrichment_wtsynon.pact"

            #Check our inported files and load
            if file_checker(file_pact_summary):
                with open(file_pact_summary, 'rb') as file_summary:
                    self.dict_summary = load(file_summary)

            if file_checker(file_pact_accepted):
                with open(file_pact_accepted, 'rb') as file_accepted:
                    self.dict_accepted = load(file_accepted)        

            if file_checker(file_pact_wtsynon):
                with open(file_pact_wtsynon, 'rb') as file_wtsynon:
                    self.dict_wtsynon = load(file_wtsynon)

        except NoOptionError:
            print("[Fitness Error] The fitness config file is incorrect.")
            print("[Fitness Error] Missing an option flag that starts with file_.")
            quit()

        #Check to see if we have a manual log2 enrichment
        if len(self.config_file.get('fitness', 'manual_log2')):
            try:
                self.wt_log2 = float(self.config_file.get('fitness', 'manual_log2'))
            except:
                self.wt_log2 = self.dict_summary['log2_wildtype']
        else:
            self.wt_log2 = self.dict_summary['log2_wildtype']

        #Handle the import of the fitness metric
        try:
            #Import the fitness metric type
            self.metric = self.config_file.get('fitness', 'metric').lower()

            #Check to see if the metric is any that we accept
            if self.metric not in ['e-wt', 'growth', 'facs']:
                print("[Fitness Error] Unknown fitness metric type.")
                quit()

            #Check to see if we have a number of doublings
            if self.metric == "growth":
                try:
                    self.growth_gp = float(self.config_file.get('fitness', 'growth_gp'))
                    self.growth_wildtype = ((self.wt_log2 / self.growth_gp) + 1)
                except:
                    print("[Fitness Error] The fitness config file is incorrect for growth_gp.")
                    quit()

            #Check to see if we have our sd or %collected for facs
            if self.metric == "facs":
                #See if erfinv is imported
                if not erfinv_import:
                    print("[Fitness Error] Inverse error function can not be loaded for FACS.")
                    quit()
            
                try:
                    self.facs_sd = float(self.config_file.get('fitness', 'facs_sd')) #Standard Deviation
                    self.facs_pc = float(self.config_file.get('fitness', 'facs_pc')) #Percent collected

                    #Print info from config file
                    print("[Fitness] FACS SD: " + str(round(self.facs_sd, 3)))
                    print("[Fitness] FACS percent collected: " + str(round(self.facs_pc, 3)))
                    print("[Fitness] WT log2 enrichment: " + str(round(self.wt_log2, 3)))

                    #Calculate the inner value
                    wt_facs_inner = 1 - self.facs_pc * pow(2, (self.wt_log2 + 1))

                    #The erfinv is -1 to 1 inclusive
                    if wt_facs_inner <= -1:
                        print("[Fitness] the WT facs equation is less than or equal to -1 for erfinv")
                        print("[Fitness] setting to -0.9999")
                        wt_facs_inner = -0.9999
                    elif wt_facs_inner >= 1:
                        print("[Fitness] the WT facs equation is greater than or equal to 1 for erfinv")
                        print("[Fitness] setting to 0.9999")
                        wt_facs_inner = 0.9999
            
                    #Theoretical maximum enrichment
                    self.e_max_theo_facs = -log(self.facs_pc, 2)
                    self.e_wildtype_facs = erfinv(wt_facs_inner)

                    #Print Theoretical Values
                    print("[Fitness] FACS equation max theo. enrichment: " + str(round(self.e_max_theo_facs, 3)))
                    print("[Fitness] FACS equation wild-type: " + str(round(self.e_wildtype_facs, 3)))

                except TypeError:
                    print("[Fitness Error] The fitness config file is incorrect for facs_sd or facs_pc.")
                    quit()

        except NoOptionError:
            print("[Fitness Error] The fitness config file is incorrect.")
            print("[Fitness Error] There is something wrong with the name of a option flag.")
            quit()

        #Check to see what type of expect value to calculate
        try:
            self.evalue_type = self.config_file.get('fitness', 'evalue_type').lower()

            #Check to see if the metric is any that we accept
            if self.evalue_type not in ['growth', 'facs']:
                self.evalue_type = None
                print("[Fitness] Not performing a t-test for variant significance.")

            #Prepare growth
            if self.evalue_type == "growth":
                #Create the pascal triangle list
                self.list_pascal = [[int((factorial(row)) / ((factorial(k)) * factorial(row - k))) 
                                 for k in range(row + 1)] 
                                 for row in range(int(self.growth_gp) + 1)]

            #Prepare FACS
            if self.evalue_type == "facs":
                try:
                    self.evalue_facs_cellcount = int(self.config_file.get('fitness', 'evalue_facs_cellcount'))
                except:
                    self.evalue_type = None
        except:
            self.evalue_type = None
            print("[Fitness] Not performing a t-test for variant significance.")

        #Import the mutation design list
        self.list_mutation_design = []
        for group in literal_eval(self.dict_protocolconfig['mutcodons']):
            #If we have n between two numbers
            if 'n' in group:
                #We need exactly three to expand the range
                if len(group) == 3:
                    #Get the start and end position, then iterate through the two points
                    self.list_mutation_design.append(list(i for i in range(group[0], group[-1] + 1)))
                else:
                    print("[Fitness Error] The codon list is missing either a start or end point around the 'n' location.")
                    quit()
            else:
                #Add the other ranges without changing them
                self.list_mutation_design.append(group)
        return