示例#1
0
文件: cinputs.py 项目: GGFHF/TOA
def input_float(text,
                default=None,
                minimum=float(-sys.maxsize - 1),
                maximum=float(sys.maxsize),
                mne=0.0,
                mxe=0.0):
    '''
    Input a float number.
    '''

    # initialize the number
    literal = None

    # input and check the float number
    while literal is None:
        if default is None:
            literal = input(f'{text}: ')
        else:
            literal = input(f'{text} [{default}]: ')
            if literal == '': literal = default
        if not xlib.check_float(literal, minimum, maximum, mne, mxe):
            print(f'*** ERROR: {literal} is not a valid value.')
            literal = None

    # return the float value
    return float(literal)
示例#2
0
def check_args(args):
    '''
    Verity the input arguments data.
    '''

    # initialize the control variable
    OK = True

    # check the assembly_software_code value
    if args.assembly_software_code is None:
        xlib.Message.print(
            'error',
            '*** The assembly software that generated the transcritpme file is not indicated in the input arguments.'
        )
        OK = False
    elif args.assembly_software_code not in [
            xlib.Const.AS_TRINITY_CODE, xlib.Const.AS_SOAPDENOVOTRANS_CODE,
            xlib.Const.AS_GENERATED_BY_NGSCLOUD
    ]:
        xlib.Message.print(
            'error',
            f'*** {args.assembly_software_code} is not a valid code of assembly software.'
        )
        OK = False

    # check the transcriptome_file value
    if args.transcriptome_file is None:
        xlib.Message.print(
            'error',
            '*** A transcritpme file in Fasta format is not indicated in the input arguments.'
        )
        OK = False
    elif not os.path.isfile(args.transcriptome_file):
        xlib.Message.print(
            'error', f'*** The file {args.transcriptome_file} does not exist.')
        OK = False

    # check the score_file value
    if args.score_file is None:
        xlib.Message.print(
            'error',
            '*** A score file where RSEM-EVAL (DETONATE package) saved the score of the transcriptome file is not indicated in the input arguments.'
        )
        OK = False
    elif not os.path.isfile(args.score_file):
        xlib.Message.print('error',
                           f'*** The file {args.score_file} does not exist.')
        OK = False

    # check the output_file value
    if args.output_file is None:
        xlib.Message.print(
            'error',
            '*** A output file where filtered transcripts will be saved is not indicated in the input arguments.'
        )
        OK = False
    else:
        try:
            if not os.path.exists(os.path.dirname(args.output_file)):
                os.makedirs(os.path.dirname(args.output_file))
        except Exception as e:
            xlib.Message.print(
                'error',
                f'*** The directory {os.path.dirname(args.output_file)} of the file {args.output_file} is not valid.'
            )
            OK = False

    # check the minlen value
    if args.minlen is None:
        args.minlen = xlib.Const.DEFAULT_MINLEN
    elif not xlib.check_int(args.minlen, minimum=1):
        xlib.Message.print(
            'error',
            '*** The minlen has to be a integer number greater than 0.')
        OK = False
    else:
        args.minlen = int(args.minlen)

    # check the maxlen value
    if args.maxlen is None:
        args.maxlen = xlib.Const.DEFAULT_MAXLEN
    elif not xlib.check_int(args.maxlen, minimum=1):
        xlib.Message.print(
            'error',
            '*** The maxlen has to be a integer number greater than 0.')
        OK = False
    else:
        args.maxlen = int(args.maxlen)

    # check the minFPKM value
    if args.minFPKM is None:
        args.minFPKM = xlib.Const.DEFAULT_MINFPKM
    elif not xlib.check_float(args.minFPKM, minimum=0.0):
        print(
            '*** FPKM has to be a float number greater than or equal to 0.0.')
        OK = False
    else:
        args.minFPKM = float(args.minFPKM)

    # check the minTPM value
    if args.minTPM is None:
        args.minTPM = xlib.Const.DEFAULT_MINTPM
    elif not xlib.check_float(args.minTPM, minimum=0.0):
        print(
            '*** FPKM has to be a float number greater than or equal to 0.0.')
        OK = False
    else:
        args.minTPM = float(args.minTPM)

    # check "verbose"
    if args.verbose is None:
        args.verbose = xlib.Const.DEFAULT_VERBOSE
    elif not xlib.check_code(
            args.verbose, xlib.get_verbose_code_list(), case_sensitive=False):
        xlib.Message.print(
            'error',
            f'*** verbose has to be {xlib.get_verbose_code_list_text()}.')
        OK = False
    if args.verbose.upper() == 'Y':
        xlib.Message.set_verbose_status(True)

    # check "trace"
    if args.trace is None:
        args.trace = xlib.Const.DEFAULT_TRACE
    elif not xlib.check_code(
            args.trace, xlib.get_trace_code_list(), case_sensitive=False):
        xlib.Message.print(
            'error', f'*** trace has to be {xlib.get_trace_code_list_text()}.')
        OK = False
    if args.trace.upper() == 'Y':
        xlib.Message.set_trace_status(True)

    # check if maxlen value is greater or equal than minlen value
    if OK:
        if args.maxlen < args.minlen:
            xlib.Message.print(
                'error',
                '*** The maxlen value has to be greater than or equal to minlen.'
            )
            OK = False

    # if there are errors, exit with exception
    if not OK:
        raise xlib.ProgramException('', 'P001')
示例#3
0
文件: xbusco.py 项目: GGFHF/NGScloud2
def check_busco_config_file(strict):
    '''
    Check the BUSCO config file of a run.
    '''

    # initialize the control variable and the error list
    OK = True
    error_list = []

    # intitialize variable used when value is not found
    not_found = '***NOTFOUND***'.upper()

    # get the option dictionary
    try:
        busco_option_dict = xlib.get_option_dict(get_busco_config_file())
    except Exception as e:
        error_list.append(f'*** EXCEPTION: "{e}".')
        error_list.append(
            '*** ERROR: The option dictionary could not be built from the config file'
        )
        OK = False
    else:

        # get the sections list
        sections_list = []
        for section in busco_option_dict.keys():
            sections_list.append(section)
        sections_list.sort()

        # check section "identification"
        if 'identification' not in sections_list:
            error_list.append(
                '*** ERROR: the section "identification" is not found.')
            OK = False
        else:

            # check section "identification" - key "experiment_id"
            experiment_id = busco_option_dict.get('identification', {}).get(
                'experiment_id', not_found)
            if experiment_id == not_found:
                error_list.append(
                    '*** ERROR: the key "experiment_id" is not found in the section "identification".'
                )
                OK = False

            # check section "identification" - key "assembly_software"
            assembly_software = busco_option_dict.get(
                'identification', {}).get('assembly_software', not_found)
            if assembly_software == not_found:
                error_list.append(
                    '*** ERROR: the key "assembly_software" is not found in the section "identification".'
                )
                OK = False
            elif not xlib.check_code(assembly_software,
                                     get_assembly_software_code_list(),
                                     case_sensitive=False):
                error_list.append(
                    f'*** ERROR: the key "assembly_software" has to be {get_assembly_software_code_list_text()}.'
                )
                OK = False

            # check section "identification" - key "assembly_dataset_id"
            assembly_dataset_id = busco_option_dict.get(
                'identification', {}).get('assembly_dataset_id', not_found)
            if assembly_dataset_id == not_found:
                error_list.append(
                    '*** ERROR: the key "assembly_dataset_id" is not found in the section "identification".'
                )
                OK = False
            elif not xlib.check_startswith(assembly_dataset_id,
                                           get_assembly_software_code_list(),
                                           case_sensitive=True):
                error_list.append(
                    f'*** ERROR: the key "assembly_dataset_id" has to start with {get_assembly_software_code_list_text()}.'
                )
                OK = False

            # check section "identification" - key "assembly_type"
            assembly_type = busco_option_dict.get('identification', {}).get(
                'assembly_type', not_found)
            if assembly_type == not_found:
                error_list.append(
                    '*** ERROR: the key "assembly_type" is not found in the section "identification".'
                )
                OK = False
            elif assembly_dataset_id.startswith(xlib.get_soapdenovotrans_code()) and assembly_type.upper() not in ['CONTIGS', 'SCAFFOLDS'] or \
                not assembly_dataset_id.startswith(xlib.get_soapdenovotrans_code()) and assembly_type.upper() != 'NONE':
                error_list.append(
                    f'*** ERROR: the key "assembly_type" has to be CONTIGS or SCAFFOLDS in {xlib.get_soapdenovotrans_name()} or NONE in any other case.'
                )
                OK = False

        # check section "BUSCO parameters"
        if 'BUSCO parameters' not in sections_list:
            error_list.append(
                '*** ERROR: the section "BUSCO parameters" is not found.')
            OK = False
        else:

            # check section "BUSCO parameters" - key "ncpu"
            ncpu = busco_option_dict.get('BUSCO parameters',
                                         {}).get('ncpu', not_found)
            if ncpu == not_found:
                error_list.append(
                    '*** ERROR: the key "ncpu" is not found in the section "BUSCO parameters".'
                )
                OK = False
            elif not xlib.check_int(ncpu, minimum=1):
                error_list.append(
                    '*** ERROR: the key "ncpu" has to be an integer number greater than or equal to 1.'
                )
                OK = False

            # check section "BUSCO parameters" - key "lineage_data_url"
            lineage_data_url = busco_option_dict.get(
                'BUSCO parameters', {}).get('lineage_data_url', not_found)
            if lineage_data_url == not_found:
                error_list.append(
                    '*** ERROR: the key "lineage_data_url" is not found in the section "BUSCO parameters"'
                )
                OK = False
            else:
                try:
                    urllib.request.urlopen(lineage_data_url)
                except Exception as e:
                    error_list.append(f'*** EXCEPTION: "{e}".')
                    error_list.append(
                        '*** ERROR: the key "lineage_data_url" has to be a reachable address.'
                    )
                    OK = False

            # check section "BUSCO parameters" - key "mode"
            mode = busco_option_dict.get('BUSCO parameters',
                                         {}).get('mode', not_found)
            if mode == not_found:
                error_list.append(
                    '*** ERROR: the key "mode" is not found in the section "BUSCO parameters".'
                )
                OK = False
            elif not xlib.check_code(
                    mode, get_mode_code_list(), case_sensitive=False):
                error_list.append(
                    f'*** ERROR: the key "mode" has to be {get_mode_code_list_text()}.'
                )
                OK = False

            # check section "BUSCO parameters" - key "evalue"
            evalue = busco_option_dict.get('BUSCO parameters',
                                           {}).get('evalue', not_found)
            if evalue == not_found:
                error_list.append(
                    '*** ERROR: the key "evalue" is not found in the section "BUSCO parameters".'
                )
                OK = False
            elif not xlib.check_float(evalue, minimum=0., mne=1E-12):
                error_list.append(
                    '*** ERROR: the key "evalue" has to be a float number greater than 0.'
                )
                OK = False

            # check section "BUSCO parameters" - key "limit"
            limit = busco_option_dict.get('BUSCO parameters',
                                          {}).get('limit', not_found)
            if limit == not_found:
                error_list.append(
                    '*** ERROR: the key "limit" is not found in the section "BUSCO parameters".'
                )
                OK = False
            elif not xlib.check_int(limit, minimum=1):
                error_list.append(
                    '*** ERROR: the key "limit" has to be an integer number greater than or equal to 1.'
                )
                OK = False

            # check section "BUSCO parameters" - key "species"
            species = busco_option_dict.get('BUSCO parameters',
                                            {}).get('species', not_found)
            if species == not_found:
                error_list.append(
                    '*** ERROR: the key "species" is not found in the section "BUSCO parameters"'
                )
                OK = False

            # check section "BUSCO parameters" - key "long"
            long = busco_option_dict.get('BUSCO parameters',
                                         {}).get('long', not_found)
            if long == not_found:
                error_list.append(
                    '*** ERROR: the key "long" is not found in the section "BUSCO parameters".'
                )
                OK = False
            elif not xlib.check_code(
                    long, get_long_code_list(), case_sensitive=False):
                error_list.append(
                    f'*** ERROR: the key "long" has to be {get_long_code_list_text()}.'
                )
                OK = False

            # check section "BUSCO parameters" - key "augustus_options"
            augustus_options = busco_option_dict.get(
                'BUSCO parameters', {}).get('augustus_options', not_found)
            if augustus_options == not_found:
                error_list.append(
                    '*** ERROR: the key "augustus_options" is not found in the section "BUSCO parameters".'
                )
                OK = False
            elif augustus_options.upper() != 'NONE':
                (OK, error_list2) = xlib.check_parameter_list(
                    augustus_options, "augustus_options", [])
                error_list = error_list + error_list2

    # warn that the results config file is not valid if there are any errors
    if not OK:
        error_list.append(
            f'\nThe {xlib.get_busco_name()} config file is not valid. Please, correct this file or recreate it.'
        )

    # return the control variable and the error list
    return (OK, error_list)
示例#4
0
def check_args(args):
    '''
    Verity the input arguments.
    '''

    # initialize the control variable
    OK = True

    # check "fasta_file"
    if args.fasta_file is None:
        xlib.Message.print(
            'error',
            '*** The input FASTA file is not indicated in the input arguments.'
        )
        OK = False
    elif not os.path.isfile(args.fasta_file):
        xlib.Message.print('error',
                           f'*** The file {args.fasta_file} does not exist.')
        OK = False

    # check "output_file"
    if args.output_file is None:
        xlib.Message.print(
            'error',
            '*** The FASTA file with debased sequences is not indicated in the input arguments.'
        )
        OK = False

    # check "fragmentation_probability"
    if args.fragmentation_probability is None:
        xlib.Message.print(
            'error',
            '*** The fragmentation probability is not indicated in the input arguments.'
        )
        OK = False
    elif not xlib.check_float(args.fragmentation_probability,
                              minimum=xlib.Const.FRAGPROB_LOWEST,
                              maximum=xlib.Const.FRAGPROB_UPPEST):
        xlib.Message.print(
            'error',
            f'The fragmentation probability has to be a float number between {xlib.Const.FRAGPROB_LOWEST} and {xlib.Const.FRAGPROB_UPPEST}.'
        )
        OK = False
    else:
        args.fragmentation_probability = float(args.fragmentation_probability)

    # check "max_fragment_number"
    if args.max_fragment_number is None:
        xlib.Message.print(
            'error',
            '*** The maximum fragment number is not indicated in the input arguments.'
        )
        OK = False
    elif not xlib.check_int(args.max_fragment_number,
                            minimum=xlib.Const.MAXFRAGNUM_LOWEST,
                            maximum=xlib.Const.MAXFRAGNUM_UPPEST):
        xlib.Message.print(
            'error',
            f'The maximum fragment number has to be a integer number between {xlib.Const.MAXFRAGNUM_LOWEST} and {xlib.Const.MAXFRAGNUM_UPPEST}.'
        )
        OK = False
    else:
        args.max_fragment_number = int(args.max_fragment_number)

    # check "max_end_shortening"
    if args.max_end_shortening is None:
        xlib.Message.print(
            'error',
            '*** The maximum shortening of a fragment end is not indicated in the input arguments.'
        )
        OK = False
    elif not xlib.check_int(args.max_end_shortening,
                            minimum=xlib.Const.MAXSHORTENING_LOWEST,
                            maximum=xlib.Const.MAXSHORTENING_UPPEST):
        xlib.Message.print(
            'error',
            f'The maximum shortening of a fragment end has to be a integer number between {xlib.Const.MAXSHORTENING_LOWEST} and {xlib.Const.MAXSHORTENING_UPPEST}.'
        )
        OK = False
    else:
        args.max_end_shortening = int(args.max_end_shortening)

    # check "min_fragment_length"
    if args.min_fragment_length is None:
        xlib.Message.print(
            'error',
            '*** The minimum fragment length is not indicated in the input arguments.'
        )
        OK = False
    elif not xlib.check_int(args.min_fragment_length, minimum=1):
        xlib.Message.print(
            'error',
            'The minimum fragment length has to be a integer number greater than 0.'
        )
        OK = False
    else:
        args.min_fragment_length = int(args.min_fragment_length)

    # check "mutation_probability"
    if args.mutation_probability is None:
        xlib.Message.print(
            'error',
            '*** The mutation probability is not indicated in the input arguments.'
        )
        OK = False
    elif not xlib.check_float(args.mutation_probability,
                              minimum=xlib.Const.MUTPROB_LOWEST,
                              maximum=xlib.Const.MUTPROB_UPPEST):
        xlib.Message.print(
            'error',
            f'The mutation probability has to be a float number between {xlib.Const.MUTPROB_LOWEST} and {xlib.Const.MUTPROB_UPPEST}'
        )
        OK = False
    else:
        args.mutation_probability = float(args.mutation_probability)

    # check "max_mutation_number"
    if args.max_mutation_number is None:
        xlib.Message.print(
            'error',
            '*** The maximum mutation number is not indicated in the input arguments.'
        )
        OK = False
    elif not xlib.check_int(args.max_mutation_number,
                            minimum=xlib.Const.MAXMUTNUM_LOWEST,
                            maximum=xlib.Const.MAXMUTNUM_UPPEST):
        xlib.Message.print(
            'error',
            f'The maximum mutation number has to be a integer number between {xlib.Const.MAXMUTNUM_LOWEST} and {xlib.Const.MAXMUTNUM_UPPEST}.'
        )
        OK = False
    else:
        args.max_mutation_number = int(args.max_mutation_number)

    # check "indel_probability"
    if args.indel_probability is None:
        xlib.Message.print(
            'error',
            '*** The insertion/deletion probability is not indicated in the input arguments.'
        )
        OK = False
    elif not xlib.check_float(args.indel_probability,
                              minimum=xlib.Const.INDELPROB_LOWEST,
                              maximum=xlib.Const.INDELPROB_UPPEST):
        xlib.Message.print(
            'error',
            f'The insertion/deletion probability has to be a float number between {xlib.Const.INDELPROB_LOWEST} and {xlib.Const.INDELPROB_UPPEST}.'
        )
        OK = False
    else:
        args.indel_probability = float(args.indel_probability)

    # check "max_mutation_size"
    if args.max_mutation_size is None:
        xlib.Message.print(
            'error',
            '*** The maximum mutation size size is not indicated in the input arguments.'
        )
        OK = False
    elif not xlib.check_int(args.max_mutation_size,
                            minimum=xlib.Const.MAXMUTSIZE_LOWEST,
                            maximum=xlib.Const.MAXMUTSIZE_UPPEST):
        xlib.Message.print(
            'error',
            f'The maximum mutation size size has to be a integer number between {xlib.Const.MAXMUTSIZE_LOWEST} and {xlib.Const.MAXMUTSIZE_UPPEST}.'
        )
        OK = False
    else:
        args.max_mutation_size = int(args.max_mutation_size)

    # check "verbose"
    if args.verbose is None:
        args.verbose = xlib.Const.DEFAULT_VERBOSE
    elif args.verbose.upper() not in get_verbose_code_list():
        xlib.Message.print(
            'error', f'The verbose has to be {get_verbose_code_list_text()}.')
        OK = False
    if args.verbose.upper() == 'Y':
        xlib.Message.set_verbose_status(True)

    # check "trace"
    if args.trace is None:
        args.trace = xlib.Const.DEFAULT_TRACE
    elif args.trace.upper() not in get_trace_code_list():
        xlib.Message.print(
            'error', f'The trace has to be {get_trace_code_list_text()}.')
        OK = False
    if args.trace.upper() == 'Y':
        xlib.Message.set_trace_status(True)

    # if there are errors, exit with exception
    if not OK:
        raise xlib.ProgramException('', 'P001')
示例#5
0
def check_cd_hit_est_config_file(strict):
    '''
    check the CD-HIT-EST config file of a run.
    '''

    # initialize the control variable and the error list
    OK = True
    error_list = []

    # intitialize variable used when value is not found
    not_found = '***NOTFOUND***'.upper()

    # get the option dictionary
    try:
        cd_hit_est_option_dict = xlib.get_option_dict(
            get_cd_hit_est_config_file())
    except Exception as e:
        error_list.append(f'*** EXCEPTION: "{e}".')
        error_list.append(
            '*** ERROR: The option dictionary could not be built from the config file'
        )
        OK = False
    else:

        # get the sections list
        sections_list = []
        for section in cd_hit_est_option_dict.keys():
            sections_list.append(section)
        sections_list.sort()

        # check section "identification"
        if 'identification' not in sections_list:
            error_list.append(
                '*** ERROR: the section "identification" is not found.')
            OK = False
        else:

            # check section "identification" - key "experiment_id"
            experiment_id = cd_hit_est_option_dict.get(
                'identification', {}).get('experiment_id', not_found)
            if experiment_id == not_found:
                error_list.append(
                    '*** ERROR: the key "experiment_id" is not found in the section "identification".'
                )
                OK = False

            # check section "identification" - key "assembly_software"
            assembly_software = cd_hit_est_option_dict.get(
                'identification', {}).get('assembly_software', not_found)
            if assembly_software == not_found:
                error_list.append(
                    '*** ERROR: the key "assembly_software" is not found in the section "identification".'
                )
                OK = False
            elif not xlib.check_code(assembly_software,
                                     get_assembly_software_code_list(),
                                     case_sensitive=False):
                error_list.append(
                    f'*** ERROR: the key "assembly_software" has to be {get_assembly_software_code_list_text()}.'
                )
                OK = False

            # check section "identification" - key "assembly_dataset_id"
            assembly_dataset_id = cd_hit_est_option_dict.get(
                'identification', {}).get('assembly_dataset_id', not_found)
            if assembly_dataset_id == not_found:
                error_list.append(
                    '*** ERROR: the key "assembly_dataset_id" is not found in the section "identification".'
                )
                OK = False
            elif not xlib.check_startswith(assembly_dataset_id,
                                           get_assembly_software_code_list(),
                                           case_sensitive=True):
                error_list.append(
                    f'*** ERROR: the key "assembly_dataset_id" has to start with {get_assembly_software_code_list_text()}.'
                )
                OK = False

            # check section "identification" - key "assembly_type"
            assembly_type = cd_hit_est_option_dict.get(
                'identification', {}).get('assembly_type', not_found)
            if assembly_type == not_found:
                error_list.append(
                    '*** ERROR: the key "assembly_type" is not found in the section "identification".'
                )
                OK = False
            elif assembly_dataset_id.startswith(xlib.get_soapdenovotrans_code()) and assembly_type.upper() not in ['CONTIGS', 'SCAFFOLDS'] or \
                not assembly_dataset_id.startswith(xlib.get_soapdenovotrans_code()) and assembly_type.upper() != 'NONE':
                error_list.append(
                    f'*** ERROR: the key "assembly_type" has to be CONTIGS or SCAFFOLDS in {xlib.get_soapdenovotrans_name()} or NONE in any other case.'
                )
                OK = False

        # check section "CD-HIT-EST parameters"
        if 'CD-HIT-EST parameters' not in sections_list:
            error_list.append(
                '*** ERROR: the section "CD-HIT-EST parameters" is not found.')
            OK = False
        else:

            # check section "CD-HIT-EST parameters" - key "threads"
            threads = cd_hit_est_option_dict.get('CD-HIT-EST parameters',
                                                 {}).get('threads', not_found)
            if threads == not_found:
                error_list.append(
                    '*** ERROR: the key "threads" is not found in the section "CD-HIT-EST parameters".'
                )
                OK = False
            elif not xlib.check_int(threads, minimum=0):
                error_list.append(
                    '*** ERROR: the key "threads" has to be an integer number greater than or equal to 0.'
                )
                OK = False

            # check section "CD-HIT-EST parameters" - key "memory_limit"
            memory_limit = cd_hit_est_option_dict.get(
                'CD-HIT-EST parameters', {}).get('memory_limit', not_found)
            if memory_limit == not_found:
                error_list.append(
                    '*** ERROR: the key "memory_limit" is not found in the section "CD-HIT-EST parameters".'
                )
                OK = False
            elif not xlib.check_int(memory_limit, minimum=0):
                error_list.append(
                    '*** ERROR: the key "memory_limit" has to be an integer number greater than or equal to 0.'
                )
                OK = False

            # check section "CD-HIT-EST parameters" - key "seq_identity_threshold"
            seq_identity_threshold = cd_hit_est_option_dict.get(
                'CD-HIT-EST parameters', {}).get('seq_identity_threshold',
                                                 not_found)
            if seq_identity_threshold == not_found:
                error_list.append(
                    '*** ERROR: the key "seq_identity_threshold" is not found in the section "CD-HIT-EST parameters".'
                )
                OK = False
            elif not xlib.check_float(
                    seq_identity_threshold, minimum=0., maximum=1.):
                error_list.append(
                    '*** ERROR: the key "seq_identity_threshold" has to be a float number between 0.0 and 1.0.'
                )
                OK = False

            # check section "CD-HIT-EST parameters" - key "word_length"
            word_length = cd_hit_est_option_dict.get(
                'CD-HIT-EST parameters', {}).get('word_length', not_found)
            if word_length == not_found:
                error_list.append(
                    '*** ERROR: the key "word_length" is not found in the section "CD-HIT-EST parameters".'
                )
                OK = False
            elif not xlib.check_int(word_length, minimum=1):
                error_list.append(
                    '*** ERROR: the key "word_length" has to be an integer number greater than or equal to 1.'
                )
                OK = False

            # check section "CD-HIT-EST parameters" - key "mask"
            mask = cd_hit_est_option_dict.get('CD-HIT-EST parameters',
                                              {}).get('mask', not_found)
            if mask == not_found:
                error_list.append(
                    '*** ERROR: the key "mask" is not found in the section "CD-HIT-EST parameters".'
                )
                OK = False

            # check section "CD-HIT-EST parameters" - key "match"
            match = cd_hit_est_option_dict.get('CD-HIT-EST parameters',
                                               {}).get('match', not_found)
            if match == not_found:
                error_list.append(
                    '*** ERROR: the key "match" is not found in the section "CD-HIT-EST parameters".'
                )
                OK = False
            elif not xlib.check_int(match):
                error_list.append(
                    '*** ERROR: the key "match" has to be an integer number.')
                OK = False

            # check section "CD-HIT-EST parameters" - key "mismatch"
            mismatch = cd_hit_est_option_dict.get('CD-HIT-EST parameters',
                                                  {}).get(
                                                      'mismatch', not_found)
            if mismatch == not_found:
                error_list.append(
                    '*** ERROR: the key "mismatch" is not found in the section "CD-HIT-EST parameters".'
                )
                OK = False
            elif not xlib.check_int(mismatch):
                error_list.append(
                    '*** ERROR: the key "mismatch" has to be an integer number.'
                )
                OK = False

            # check section "CD-HIT-EST parameters" - key "other_parameters"
            not_allowed_parameters_list = [
                'T', 'M', 'c', 'n', 'mask', 'match', 'mismatch'
            ]
            other_parameters = cd_hit_est_option_dict.get(
                'CD-HIT-EST parameters', {}).get('other_parameters', not_found)
            if other_parameters == not_found:
                error_list.append(
                    '*** ERROR: the key "other_parameters" is not found in the section "CD-HIT-EST parameters".'
                )
                OK = False
            elif other_parameters.upper() != 'NONE':
                (OK, error_list2) = xlib.check_parameter_list(
                    other_parameters, "other_parameters",
                    not_allowed_parameters_list)
                error_list = error_list + error_list2

    # warn that the results config file is not valid if there are any errors
    if not OK:
        error_list.append(
            f'\nThe {xlib.get_cd_hit_est_name()} config file is not valid. Please, correct this file or recreate it.'
        )

    # return the control variable and the error list
    return (OK, error_list)
示例#6
0
def check_args(args):
    '''
    Check the input arguments.
    '''

    # initialize the control variable
    OK = True

    # check "input_vcf_file"
    if args.input_vcf_file is None:
        xlib.Message.print(
            'error',
            '*** The VCF file is not indicated in the input arguments.')
        OK = False
    elif not os.path.isfile(args.input_vcf_file):
        xlib.Message.print(
            'error', f'*** The file {args.input_vcf_file} does not exist.')
        OK = False

    # check "sample_file"
    if args.sample_file is None:
        xlib.Message.print(
            'error',
            '*** The sample file is not indicated in the input arguments.')
        OK = False
    elif not os.path.isfile(args.sample_file):
        xlib.Message.print('error',
                           f'*** The file {args.sample_file} does not exist.')
        OK = False

    # check "fix"
    if args.fix is None:
        xlib.Message.print('error',
                           '*** Fix is not indicated in the input arguments.')
        OK = False
    elif not xlib.check_code(
            args.fix, xlib.get_fix_code_list(), case_sensitive=False):
        xlib.Message.print(
            'error', f'*** fix has to be {xlib.get_fix_code_list_text()}.')
        OK = False
    else:
        args.fix = args.fix.upper()

    # check "scenario"
    if args.scenario is None:
        xlib.Message.print(
            'error',
            '*** The scenario is not indicated in the input arguments.')
        OK = False
    elif not xlib.check_code(args.scenario,
                             xlib.get_scenario_code_list(),
                             case_sensitive=False):
        xlib.Message.print(
            'error',
            f'*** The scenario has to be {xlib.get_scenario_code_list_text()}.'
        )
        OK = False

    # check "min_aa_percentage"
    if args.min_aa_percentage is None:
        xlib.Message.print(
            'error',
            '*** The minimum percent of alternative alleles per species is not indicated in the input arguments.'
        )
        OK = False
    elif not xlib.check_float(
            args.min_aa_percentage, minimum=0.0, maximum=100.0):
        xlib.Message.print(
            'error',
            'The minimum percent of alternative alleles per species has to be a float number between 0.0 and 100.0.'
        )
        OK = False
    else:
        args.min_aa_percentage = float(args.min_aa_percentage)

    # check "min_md_imputation_percentage"
    if args.min_md_imputation_percentage is None:
        xlib.Message.print(
            'error',
            '*** The minimum percentage of missing data imputation to a new alternative allele per species is not indicated in the input arguments.'
        )
        OK = False
    elif not xlib.check_float(
            args.min_md_imputation_percentage, minimum=0.0, maximum=100.0):
        xlib.Message.print(
            'error',
            'The minimum percentage of missing data imputation to a new alternative allele per species has to be a float number between 0.0 and 100.0.'
        )
        OK = False
    else:
        args.min_md_imputation_percentage = float(
            args.min_md_imputation_percentage)

    # check "imputed_md_id"
    if args.imputed_md_id is None:
        args.imputed_md_id = xlib.Const.DEFAULT_IMPUTED_MD_ID

    # check "sp1_id"
    if args.sp1_id is None:
        xlib.Message.print(
            'error',
            '*** The identification of the first species is not indicated in the input arguments.'
        )
        OK = False

    # check "sp1_max_md_percentage"
    if args.sp1_max_md_percentage is None:
        xlib.Message.print(
            'error',
            '*** The maximum percentage of missing data of the first species is not indicated in the input arguments.'
        )
        OK = False
    elif not xlib.check_float(
            args.sp1_max_md_percentage, minimum=0.0, maximum=100.0):
        xlib.Message.print(
            'error',
            'The maximum percentage of missing data of the first species has to be a float number between 0.0 and 100.0.'
        )
        OK = False
    else:
        args.sp1_max_md_percentage = float(args.sp1_max_md_percentage)

    # check "sp2_id"
    if args.sp2_id is None:
        xlib.Message.print(
            'error',
            '*** The identification of the second species is not indicated in the input arguments.'
        )
        OK = False

    # check "sp2_max_md_percentage"
    if args.sp2_max_md_percentage is None:
        xlib.Message.print(
            'error',
            '*** The maximum percentage of missing data of the second species is not indicated in the input arguments.'
        )
        OK = False
    elif not xlib.check_float(
            args.sp2_max_md_percentage, minimum=0.0, maximum=100.0):
        xlib.Message.print(
            'error',
            'The maximum percentage of missing data of the second species has to be a float number between 0.0 and 100.0.'
        )
        OK = False
    else:
        args.sp2_max_md_percentage = float(args.sp2_max_md_percentage)

    # check "hybrid_id"
    if args.hybrid_id is None:
        args.hybrid_id = 'NONE'

    # check "min_afr_percentage"
    if args.min_afr_percentage is None:
        xlib.Message.print(
            'error',
            '*** The minimum percentage of allele frequency per species is not indicated in the input arguments.'
        )
        OK = False
    elif not xlib.check_float(
            args.min_afr_percentage, minimum=0.0, maximum=100.0):
        xlib.Message.print(
            'error',
            'The minimum percentage of allele frequency per species has to be a float number between 0.0 and 100.0.'
        )
        OK = False
    else:
        args.min_afr_percentage = float(args.min_afr_percentage)

    # check "min_depth"
    if args.min_depth is None:
        args.min_depth = xlib.Const.DEFAULT_MIN_DEPTH
    elif not xlib.check_int(args.min_depth, minimum=1):
        xlib.Message.print(
            'error',
            'The minimum combined depth across samples has to be an integer number greater than  or equal to 1.'
        )
        OK = False
    else:
        args.min_depth = int(args.min_depth)

    # check "output_vcf_file"
    if args.output_vcf_file is None:
        xlib.Message.print(
            'error',
            '*** The output VCF file is not indicated in the input arguments.')
        OK = False

    # check "verbose"
    if args.verbose is None:
        args.verbose = xlib.Const.DEFAULT_VERBOSE
    elif not xlib.check_code(
            args.verbose, xlib.get_verbose_code_list(), case_sensitive=False):
        xlib.Message.print(
            'error',
            f'*** verbose has to be {xlib.get_verbose_code_list_text()}.')
        OK = False
    if args.verbose.upper() == 'Y':
        xlib.Message.set_verbose_status(True)

    # check "trace"
    if args.trace is None:
        args.trace = xlib.Const.DEFAULT_TRACE
    elif not xlib.check_code(
            args.trace, xlib.get_trace_code_list(), case_sensitive=False):
        xlib.Message.print(
            'error', f'*** trace has to be {xlib.get_trace_code_list_text()}.')
        OK = False
    if args.trace.upper() == 'Y':
        xlib.Message.set_trace_status(True)

    # check "tvi_list"
    if args.tvi_list is None or args.tvi_list == 'NONE':
        args.tvi_list = []
    else:
        args.tvi_list = xlib.split_literal_to_string_list(args.tvi_list)

    # check the identification set
    if OK:
        if args.sp1_id == args.sp2_id or \
           args.hybrid_id is not None and (args.sp1_id == args.hybrid_id or args.sp2_id == args.hybrid_id):
            xlib.Message.print('error',
                               'The identifications must be different.')
            OK = False

    # if there are errors, exit with exception
    if not OK:
        raise xlib.ProgramException('', 'P001')