Пример #1
0
def get_fastqc_choice(Fastqc, param):
    """
    Function that gets the choice of controling quality of raw input and 
    filtered data by FastQC.
    
    Takes two arguments : - Fastqc [ElementTree] : subtree which contains the 
                                                   choice of the user to do or 
                                                   not a quality control
                          - param [dict] : dictionnary containing all parameters
    
    Returns param[dict] where the choice for FastQC have been added
    """
    
    # check that the section Fastqc contains only a skip option
    if not ce.check_child_number(Fastqc,1):
        sys.exit("/!\ The XML file must contain only a skip option in the \
section 'fastqc'!")
    
    # get skip option text    
    skip = Fastqc.find('skip').text
    
    # check if it's not empty and either 'yes' or 'no'
    skip = ce.check_yes_no(skip, 'skip in fastqc')
    
    # if skip = 'no' add to param [dict]
    if(skip == 'no') :
        param['fastqc']='yes'
    
    return param
Пример #2
0
def separate_categories_Trimmo(Trimmomatic):
    """
    Function that separates the subtree (Trimmomatic program) according to 
    categories.
    
    Takes one argument : Trimmomatic [ElementTree]
    
    Returns four arguments :
        - Adapter [ElementTree] : subtree which contains adapter trimming 
        parameters
        - Quality [ElementTree] : subtree which contains quality trimming 
        parameters
        - Useful [ElementTree] : subtree which contains uselful parameters
    """
    
    # check that the number of categories is 3   
    if not ce.check_child_number(Trimmomatic,3) : 
        sys.exit("/!\ The XML file must contain exactly 3 categories for \
Trimmomatic!")
    
    # separates categories of Trimmomatic
    for category in Trimmomatic :
        
        # get parameters for 'Adapter-Trimming' in a subtree
        if(category.get('name') == 'adapter-trimming') :
            Adapter = category
            continue
        
        # get parameters for 'Quality-Trimming' in a subtree
        elif(category.get('name') == 'quality-trimming') : 
            Quality = category
            continue
            
        # get parameters for 'Usefull-Parameters' in a subtree
        elif(category.get('name') == 'useful-parameters') :
            Useful = category
            continue
        
        else :
            sys.exit("/!\ Atleast one category haven't been recognized.\n\
Please, have a look at the name of Trimmomatic categories, atleast one of them \
have been modified.")
    
    return Adapter, Quality, Useful
Пример #3
0
def separate_steps(root):
    """
    Function that separate different steps (Inputs/Outputs and different Programs).
    
    Takes one argument : root [ElementTree] : the root of the tree
    
    Returns three arguments :
        - Puts [ElementTree] : subtree which contains inputs and outputs 
          information's
        - Fastqc [ElementTree] : subtree which contains quality control 
          information's
        - Trimmomatic [ElementTree] : subtree which contains Trimmomatic 
          program parameters
    """
    
    # check that the root have 3 child
    if not ce.check_child_number(root,3) :
        sys.exit("/!\ Warning : The XML file must contain exactly one Input and\
 Output section and two programs (FastQC & Trimmomatic)")
    
    # separte the different steps    
    for element in root :
        
        if(element.tag == 'input-output') :
            Puts = element
        
        elif(element.get('name')=='fastqc') :
            Fastqc = element
            
        elif(element.get('name')=='trimmomatic') :
            Trimmomatic=element
        
        else:
            sys.exit("/!\ Oops! Atleast the name of the section 'input-output' \
or one of program names (fastqc of trimmomatic) have been modified")
    
    return Puts, Fastqc, Trimmomatic
Пример #4
0
def get_useful_parameters(Useful, param) :
    """
    Function that gets useful parameters.
    
    Takes two arguments:
        - Useful [ElementTree] : subtree which contains useful parameters
        - param [dict] : dictionnary containing all parameters
        
    Returns param [dict] with added quality trimming parameters
    """
    
    # check that useful parameter have 4 child
    if not ce.check_child_number(Useful,4):
        sys.exit("/!\ The XML file must contain exactly 4 useful parameters.")
        

    # PARAMETERS ---------------------------------------------------------------
    
    for parameter in Useful.findall('parameter') :
        
        # SINGLETON-READS ------------------------------------------------------

        if(parameter.get('name') == 'singleton-reads'):
            
            # check if text for show is not empty and either yes or no
            show = ce.check_yes_no(parameter.find('show').text, 
                               'show in singleton-reads from useful-parameters')
            
            # if show = yes, then add to the dict
            if(show == 'yes'):
                param['keep_singleton'] = 'yes'
            continue
            

        # CONVERT-TO-PHRED -----------------------------------------------------

        elif(parameter.get('name') == 'convert-to-phred') :
            
            # get and check if skip text is not empty and if it's yes or no
            skip = ce.check_yes_no(parameter.find('skip').text, 
                              'skip in convert-to-phred from useful parameters')
            
            if(skip == 'no'): 
                # get parameter
                format_num = ce.check_integer(parameter.find('format').text,
                             'format in convert-to-phred in useful parameters.')
                
                # if format is phred33 or phred64, add to the dict
                if(format_num == 33):
                    param['tophred33'] = 'TOPHRED33'
                elif(format_num == 64):
                    param['tophred64'] = "TOPHRED64"
                else :
                    sys.exit("/!\ Quality score can only be converted to \
phred33 or phred64.")
                    
              
        # THREADS --------------------------------------------------------------      
        
        elif (parameter.get('name') == 'threads') :

            # get number of threads
            number = ce.check_integer(parameter.find('number').text, 
                                  'number in threads in useful parameters.')
            
            param['threads'] = number
            continue
            
            
        # COMPRESSION ----------------------------------------------------------

        elif(parameter.get('name') == 'compressed-output'):
            
            # get and check if skip text is not empty and if it's yes or no
            skip = ce.check_yes_no(parameter.find('skip').text,
                            'skip in compressed-output from useful parameters.')
            
            if(skip == 'no') :
                
                # get compression format
                format = parameter.find('format').text
                
                # check if the text is not empty and lower it
                if not empty(format) :
                    format = format.strip()
                    format = format.lower()

                    if(format == '.bz2' or format == '.gz'):
                        param['compress'] = format

                    sys.exit("/!\ Value for format in compressed-output in \
useful parameters can only be 'bz2' or 'gz'.")

                sys.exit("/!\ You haven't enter a text for format in \
compressed-output in useful parameters.")

                continue
                
        else :
            sys.exit("You have modified a useful parameter name or enter a new \
one which have not been recognized\n")
            
    return param
Пример #5
0
def get_quality_parameters(Quality, param) :
    """
    Function that gets quality trimming parameters.
    
    Takes 2 arguments:
        - Quality [ElementTree] : subtree which contains quality trimming 
          parameters
        - param [dict] : dictionnary containing all parameters
        
    Returns param [dict] with added quality trimming parameters
    """
    
    # check that quality trimming subtree have 9 child (skip and 9 parameters)
    if not ce.check_child_number(Quality,9) :
            sys.exit("/!\ The XML file must contain exactly one skip option \
skip and 8 parameters for quality trimming.")
    

    # SKIP ---------------------------------------------------------------------

    # get the skip option text
    skip = Quality.find('skip').text

    # check that it is not empty and either 'yes' either 'no'
    skip = ce.check_yes_no(skip, 'skip in quality trimming')
    

    # PARAMETERS ---------------------------------------------------------------

    # getting the parameters if skip = no
    if(skip == 'no'):
        
        param['quality']='yes'
        
        for parameter in Quality.findall('parameter'):
            
            # get and check the skip option for each parameter
            param_skip = ce.check_yes_no(parameter.find('skip').text, 
                       'skip in %s from quality trimming'%parameter.get('name'))
            
            # if skip = no, get parameter arguments and add to dict
            if(param_skip == 'no'):
                
                # SLIDING-WINDOW -----------------------------------------------
                
                if(parameter.get('name') == 'sliding-window'):

                    SW_size = ce.check_integer(parameter.find('window-length').text,
                                     'window length in sliding window trimming')
                    SW_quality = ce.check_integer(parameter.find('required-quality').text,
                                 'required-quality for sliding window trimming')
                    
                    param['slidingwindow'] = '{0}:{1}'.format(SW_size,SW_quality)
                    continue
                

                # MAXINFO ------------------------------------------------------

                elif(parameter.get('name')== 'maxinfo'):
                    
                    MI_length = ce.check_integer(parameter.find('target-length').text,
                                    'target-length in maxinfo quality trimming')
                    MI_strictness = ce.check_float(parameter.find('strictness').text,
                                       'strictness in maxinfo quality trimming')
                    
                    param['maxinfo'] = '{0}:{1}'.format(MI_length, MI_strictness)
                    continue
                

                # LEADING ------------------------------------------------------

                elif(parameter.get('name') == 'leading'):
                
                    lead_quality = ce.check_integer(parameter.find('required-quality').text,
                               "required-quality in 'leading' quality trimming")
                    
                    param['leading'] = lead_quality
                    continue
                

                # TRAILING -----------------------------------------------------

                elif(parameter.get('name') == 'trailing'):
                    
                    tail_quality = ce.check_integer(parameter.find('required-quality').text,
                              "required-quality in 'trailing' quality trimming")
                    
                    param['trailing'] = tail_quality
                    continue
                

                # CROP ---------------------------------------------------------

                elif(parameter.get('name') == 'crop'):
                    crop_length = ce.check_integer(parameter.find('length').text,
                                            "length in 'crop' quality trimming")
                    
                    param['crop'] = crop_length
                    continue
                

                # HEADCROP -----------------------------------------------------

                elif(parameter.get('name') == 'headcrop'):
                    headcrop_length = ce.check_integer(parameter.find('length').text,
                                        "length in 'headcrop' quality trimming")
                    
                    param['headcrop'] = headcrop_length
                    continue    
                    

                # MINLEN -------------------------------------------------------

                elif(parameter.get('name') == 'minlen'):
                    min_len = ce.check_integer(parameter.find('length').text,
                                          "length in 'minlen' quality trimming")
                    
                    param['minlen'] = min_len
                    continue
                

                # AVERAGE-QUALITY ----------------------------------------------

                elif(parameter.get('name') == 'average-quality'):
                    avg_qual = ce.check_integer(parameter.find('required-quality').text,
                               "required-quality in 'average quality' trimming")
                    
                    param['avgqual'] = avg_qual
                    continue
                
                else :
                    sys.exit("/!\ You have modified a quality trimming parameter\
 name or enter a new one which have not been recognized")
            
    return param
Пример #6
0
def get_adapter_parameters(Adapter, param):
    """
    Function that gets adapter trimming parameters
    
    Takes two arguments:
        - Adapter [ElementTree] : subtree which contains adapter trimming 
          parameters
        - param [dict] : dictionnary containing all parameters
        
    Returns param [dict] with added adapter trimming parameters
    """
    
    # check adapter section have 2 child (skip and parameters)
    if not ce.check_child_number(Adapter,2):
            sys.exit("/!\ Warning : The XML file must contain exactly 1 skip \
option and 1 parameter for adapter trimming.")
    

    # SKIP ---------------------------------------------------------------------

    # get the skip option text
    skip = Adapter.find('skip').text
    
    # checking if it's not empty and either yes either no
    skip = ce.check_yes_no(skip, 'skip in adapter trimming')
    

    # PARAMETERS ---------------------------------------------------------------
    
    # if skip = 'no' get parameters
    if(skip == 'no') :
        
        # getting the subtree parameter
        parameter = Adapter.find('parameter')
        
        # get parameter which name is 'illuminaclip'
        if(parameter.get('name')=='illuminaclip') :
            
            Clip = parameter
            
            # get obligatory parameters ----------------------------------------

            fasta_file = ce.check_fasta_file(Clip.find('adapters-fasta-file').text)
            
            mismatches = ce.check_integer(Clip.find('seed-mismatches').text, 
                                         'mismatches in illuminaclip')

            P_thres = ce.check_integer(Clip.find('palindrome-clip-threshold').text,
                                    'palindrome clip threshold in illuminaclip')

            S_thres = ce.check_integer(Clip.find('simple-clip-threshold').text,
                                      'simple clip threshold in illuminaclip')
            
            # optional parameters ----------------------------------------------
            

                # min-adapter-length
            minlen = Clip.find('min-adapter-length')    

            # get min-adapter-length skip value        
            minlen_skip = ce.check_yes_no(minlen.find('skip').text, 
                                  'min-adapter-length skip in adapter trimming')
            
            if(minlen_skip == 'no') :
                min_length = ce.check_integer(minlen.find('value').text, 
                                       'min-adapter-length in adapter trimming')
            else :
                min_length = 8 # default value
            

                # get keep-both-reads value 
            keepreads = Clip.find('keep-both-reads')
        
            # get keep-both-reads skip value
            keepreads_skip = ce.check_yes_no(keepreads.find('skip').text, 
                                     'keep-both-reads skip in adapter trimming')
            
            if(keepreads_skip == 'no'):
                keep = ce.check_true_false(keepreads.find('value').text,
                                          'keep-both-reads in adapter trimming')
            else:
                keep = 'true' # default value
            
            # Add all parameters to dictionnary --------------------------------

            param['illuminaclip']="{0}:{1}:{2}:{3}:{4}:{5}".format(fasta_file,
                                                           mismatches,P_thres,
                                                           S_thres,min_length,
                                                           keep)
            
        # if parameter name is not 'illuminaclip'
        else :
            sys.exit("/!\ Name of parameter 'illuminaclip' have been modified or\
 replaced by something else. Please rename it 'illuminaclip'")
            
    return param