示例#1
0
def compute_first_phase() -> Phase:
    """ Parse only a part of arguments needed to know if the user asked to start the software from the beginning or from
    a specific phase. Thus return the asked phase.
    """
    main_dir_name = _get_main_dir_name()
    environment_file_path = os.path.join(os.getcwd(), main_dir_name,
                                         ENVIRONMENT_FILE_NAME)

    if _env_file_exists(environment_file_path) and _resume_phase_asked():
        # User wants to resume where he stopped last time
        load_environment_file(path=environment_file_path)
        current_phase = str_to_phase(
            sys.argv[sys.argv.index(gpn.resume_phase()) + 1])

        # Check if the data needed to process the phase asked have been previously computed
        if not phase_processable(phase_to_compute=current_phase,
                                 last_phase_computed=env.last_phase):
            raise UnprocessablePhase(phase_to_str(current_phase),
                                     phase_to_str(env.last_phase))
        else:
            env.current_phase = current_phase
            return current_phase
    else:
        # User want to compute all phases, regarding of previous computations (or asked it but environment file has not
        # been found).
        if (not _env_file_exists(environment_file_path)
            ) and _resume_phase_asked():
            vprint(Message.ENVIRONMENT_FILE_NOT_FOUND)
        env.current_phase = Phase.PARSING
        return Phase.PARSING
示例#2
0
def _resume_phase_asked() -> bool:
    """ Check if the user asked to resume at a specific phase. Try to retrieve this information directly from the
    command-line.
    """
    try:
        sys.argv.index(gpn.resume_phase())
        return True
    except ValueError:
        return False
示例#3
0
 param_class_matrix_prefix=gpn.class_matrix_prefix(),
 param_clustering_trees_prefix=gpn.clustering_trees_prefix(),
 param_main_directory=gpn.main_directory(),
 param_subtrain_directory=gpn.subtrain_directory(),
 param_subsubtrain_directory=gpn.subsubtrain_directory(),
 param_true_class_directory=gpn.true_class_directory(),
 param_classes_matrices_directory=gpn.classes_matrices_directory(),
 param_clustering_trees_directory=gpn.clustering_trees_directory(),
 param_subsubtrain_directory_pattern=gpn.subsubtrain_directory_pattern(),
 param_discretization_threshold=gpn.discretization_threshold(),
 param_entropy_threshold=gpn.entropy_threshold(),
 param_min_size_leaf=gpn.min_size_leaf(),
 param_entropy_measure=gpn.entropy_measure(),
 param_number_of_tnorms=gpn.number_of_tnorms(),
 param_last_phase=gpn.last_phase(),
 param_resume_phase=gpn.resume_phase(),
 param_help=gpn.help_param(),
 param_identifier=gpn.identifier(),
 param_class_name=gpn.class_name(),
 param_have_header=gpn.have_header(),
 param_encoding_input=gpn.encoding_input(),
 param_encoding_output=gpn.encoding_output(),
 param_format_input=gpn.format_input(),
 param_format_output=gpn.format_output(),
 param_delimiter_input=gpn.delimiter_input(),
 param_delimiter_output=gpn.delimiter_output(),
 param_quoting_input=gpn.quoting_input(),
 param_quoting_output=gpn.quoting_output(),
 param_quote_char_input=gpn.quote_char_input(),
 param_quote_char_output=gpn.quote_char_output(),
 param_line_delimiter_input=gpn.line_delimiter_input(),
示例#4
0
def clean_args(args: dict) -> None:
    """ Clean the command-line arguments parsed by the `docopt` package.
    It mainly convert string values to their numeric and enum counterpart. If a parameter requiring an index or a column
    name has been completed with a name, it change it to its corresponding index. It also checks if some of the
    parameters are invalids and raises exceptions accordingly.
    """
    # Rename parameter database
    args[gpn.database()] = args["<" + gpn.database() + ">"]
    del args["<" + gpn.database() + ">"]

    # Rename parameter parent_dir
    args[gpn.parent_dir()] = args["<" + gpn.parent_dir() + ">"]
    del args["<" + gpn.parent_dir() + ">"]

    # Clean important args used by other functions
    args[gpn.quoting_input()] = str_to_quoting(args[gpn.quoting_input()])

    extension = args[gpn.format_output()].lower()

    for param_name in args.keys():
        if param_name == gpn.parent_dir():
            if not args[param_name]:
                args[param_name] = get_absolute_path(".")
            else:
                args[param_name] = get_absolute_path(args[param_name])
        if param_name == gpn.class_name():
            _check_key_exists(args,
                              param_name,
                              custom_exception=MissingClassificationAttribute)
            _clean_column_index_or_name(args=args,
                                        param_name=param_name,
                                        column_name="class")
        elif param_name in (gpn.discretization_threshold(),
                            gpn.number_of_tnorms(), gpn.trees_in_forest()):
            args[param_name] = int(args[param_name])
        elif param_name in (gpn.format_input(), gpn.format_output()):
            args[param_name] = str_to_format(args[param_name])
        elif param_name == gpn.entropy_measure():
            args[param_name] = str_to_entropymeasure(args[param_name])
        elif param_name in (gpn.entropy_threshold(), gpn.quality_threshold()):
            if not is_a_percentage(args[param_name]):
                raise InvalidPercentage(args[param_name])
        elif param_name == gpn.identifier():
            if _check_default_value_id(args[param_name], gdv.identifier()):
                # We must add a column as an identifier. It will be done in the preprocessing function
                args[param_name] = None
            else:
                _clean_column_index_or_name(args=args,
                                            param_name=param_name,
                                            column_name="identifier")
        elif param_name in (gpn.initial_split_method(),
                            gpn.reference_split_method(),
                            gpn.subsubtrain_split_method()):
            args[param_name] = str_to_splittingmethod(args[param_name])
            if args[param_name] == SplittingMethod.KEEP_DISTRIBUTION and args[
                    gpn.class_name()] is None:
                raise MissingClassificationAttribute()
        elif param_name == gpn.quality_computing_method():
            args[param_name] = str_to_qualitycomputingmethod(args[param_name])
        elif param_name == gpn.clustering_trees_method():
            args[param_name] = str_to_clusteringtreesmethod(args[param_name])
        elif param_name == gpn.quoting_output():
            args[param_name] = str_to_quoting(args[param_name])
        elif param_name == gpn.main_directory():
            if args[param_name] is None:
                args[param_name] = get_filename(args[gpn.database()])
        elif param_name == gpn.preprocessed_database_name():
            if args[param_name] is None:
                args[param_name] = _get_preprocessed_db_name(
                    database_name=args[gpn.database()], extension=extension)
            else:
                args[param_name] = get_filename(args[param_name],
                                                with_extension=True)
        elif param_name in (gpn.reference_name(), gpn.subtrain_name(),
                            gpn.test_name(), gpn.train_name()):
            args[param_name] = "{}.{}".format(
                get_filename(args[param_name], with_extension=False),
                extension)
        elif param_name == gpn.header_name():
            args[param_name] = "{}.{}".format(
                get_filename(args[param_name], with_extension=False),
                args[gpn.header_extension()])
        elif param_name == gpn.verbosity():
            args[param_name] = str_to_verbosity(args[param_name])
        elif param_name in (gpn.line_delimiter_input(),
                            gpn.line_delimiter_output()):
            if args[param_name] not in (None, "", "\n", "\r", "\r\n"):
                if args[param_name] == "\\n":
                    args[param_name] = "\n"
                else:
                    raise IllegalLineDelimiter(args[param_name])
        elif param_name in (gpn.last_phase(), gpn.resume_phase()):
            args[param_name] = str_to_phase(args[param_name])
        elif param_name == gpn.clustering_trees_method():
            args[param_name] = str_to_clusteringtreesmethod(args[param_name])
示例#5
0
def _init_command_line_parameters(args: dict) -> None:
    """ Initialize all the command-line-parameters-related variables located inside the `env` module. """
    env.cclassified_vector_prefix = args.get(
        gpn.cclassified_vector_prefix().split()[-1])
    env.class_name = args.get(gpn.class_name().split()[-1])
    env.class_matrix_prefix = args.get(gpn.class_matrix_prefix().split()[-1])
    env.classes_matrices_directory = args.get(
        gpn.classes_matrices_directory().split()[-1])
    env.clustering_trees_directory = args.get(
        gpn.clustering_trees_directory().split()[-1])
    env.clustering_trees_method = args.get(
        gpn.clustering_trees_method().split()[-1])
    env.clustering_trees_prefix = args.get(
        gpn.clustering_trees_prefix().split()[-1])
    env.delimiter_input = args.get(gpn.delimiter_input().split()[-1])
    env.delimiter_output = args.get(gpn.delimiter_output().split()[-1])
    env.difficulty_vector_prefix = args.get(
        gpn.difficulty_vector_prefix().split()[-1])
    env.discretization_threshold = args.get(
        gpn.discretization_threshold().split()[-1])
    env.encoding_input = args.get(gpn.encoding_input().split()[-1])
    env.encoding_output = args.get(gpn.encoding_output().split()[-1])
    env.entropy_measure = args.get(gpn.entropy_measure().split()[-1])
    env.entropy_threshold = args.get(gpn.entropy_threshold().split()[-1])
    env.format_input = args.get(gpn.format_input().split()[-1])
    env.format_output = args.get(gpn.format_output().split()[-1])
    env.have_header = args.get(gpn.have_header().split()[-1])
    env.header_extension = args.get(gpn.header_extension().split()[-1])
    env.header_name = args.get(gpn.header_name().split()[-1])
    env.identifier = args.get(gpn.identifier().split()[-1])
    env.last_phase = args.get(gpn.last_phase().split()[-1])
    env.initial_database_name = args.get(gpn.database().split()[-1])
    env.initial_split_method = args.get(gpn.initial_split_method().split()[-1])
    env.line_delimiter_input = args.get(gpn.line_delimiter_input().split()[-1])
    env.line_delimiter_output = args.get(
        gpn.line_delimiter_output().split()[-1])
    env.main_directory = args.get(gpn.main_directory().split()[-1])
    env.minimal_size_leaf = args.get(gpn.min_size_leaf().split()[-1])
    env.parent_dir = args.get(gpn.parent_dir())
    env.preprocessed_database_name = args.get(
        gpn.preprocessed_database_name().split()[-1])
    env.quality_threshold = args.get(gpn.quality_threshold().split()[-1])
    env.salammbo_vector_prefix = args.get(
        gpn.salammbo_vector_prefix().split()[-1])
    env.quality_computing_method = args.get(
        gpn.quality_computing_method().split()[-1])
    env.quality_file_prefix = args.get(gpn.quality_file_prefix().split()[-1])
    env.quote_character_input = args.get(gpn.quote_char_input().split()[-1])
    env.quote_character_output = args.get(gpn.quote_char_output().split()[-1])
    env.quoting_input = args.get(gpn.quoting_input().split()[-1])
    env.quoting_output = args.get(gpn.quoting_output().split()[-1])
    env.reference_database_name = args.get(gpn.reference_name().split()[-1])
    env.reference_split_method = args.get(
        gpn.reference_split_method().split()[-1])
    env.reference_value = args.get(gpn.reference_value().split()[-1])
    env.resume_phase = args.get(gpn.resume_phase().split()[-1])
    env.statistics_file_name = args.get(gpn.statistics_file_name().split()[-1])
    env.subsubtrain_directory = args.get(
        gpn.subsubtrain_directory().split()[-1])
    env.subsubtrain_directory_pattern = args.get(
        gpn.subsubtrain_directory_pattern().split()[-1])
    env.subsubtrain_name_pattern = args.get(
        gpn.subsubtrain_name_pattern().split()[-1])
    env.subsubtrain_split_method = args.get(
        gpn.subsubtrain_split_method().split()[-1])
    env.subtrain_directory = args.get(gpn.subtrain_directory().split()[-1])
    env.subtrain_name = args.get(gpn.subtrain_name().split()[-1])
    env.t_norms = args.get(gpn.number_of_tnorms().split()[-1])
    env.test_database_name = args.get(gpn.test_name().split()[-1])
    env.train_database_name = args.get(gpn.train_name().split()[-1])
    env.training_value = args.get(gpn.training_value().split()[-1])
    env.tree_file_extension = args.get(gpn.tree_file_extension().split()[-1])
    env.trees_in_forest = args.get(gpn.trees_in_forest().split()[-1])
    env.true_class_directory = args.get(gpn.true_class_directory().split()[-1])
    env.vector_file_extension = args.get(
        gpn.vector_file_extension().split()[-1])
    env.verbosity = args.get(gpn.verbosity().split()[-1])