Exemplo n.º 1
0
    def threshold_determination(self, context, classifier_name,
                                patterns_outputs):
        """
            With the discretized outputs for roc values, determine the best values for the threshold.
            """
        statistics_class = Statistics()
        #Aux structures
        threshold_list = AutoVivification()
        minimum_error = AutoVivification()

        for class_text in context["classifiers"][classifier_name][
                "classes_names"]:
            #Initialize the aux structures
            threshold_list[class_text] = []
            minimum_error[class_text] = float('inf')
            self.info[classifier_name][class_text]["threshold"][
                "medium"] = float('inf')
            self.info[classifier_name][class_text]["threshold"][
                "minimum"] = float('inf')
            self.info[classifier_name][class_text]["threshold"][
                "maximum"] = float('-inf')
            #For each value of threshold generated
        for threshold in self.info[classifier_name]["roc_outputs"]:
            #Calculate the goodness of the classifier
            statistics_class.goodness(
                context, classifier_name,
                self.info[classifier_name]["roc_outputs"][threshold],
                patterns_outputs)
            for class_text in context["classifiers"][classifier_name][
                    "classes_names"]:
                error = 0.0
                for function in context["classifiers"][classifier_name][
                        "thresholds"]["metric"]:
                    getattr(statistics_class,
                            function)(classifier_name, context, self,
                                      "validation")
                    error += statistics_class.measures[classifier_name][
                        class_text][function]
                #If we find a minimum error, we save it
                if error < minimum_error[class_text]:
                    minimum_error[class_text] = error
                    threshold_list[class_text] = [threshold]
                    #When we find a new global minimum we have to reset the list
                    #And save it again
                    #If there is a tie in terms of goodness, save all the range of values with the minimum error
                if error == minimum_error[class_text]:
                    threshold_list[class_text].append(threshold)
                    #Determine different kinds of thresholds

                if len(threshold_list[class_text]) == 0:
                    raise ValueError("There is no threshold selected")
        return threshold_list
Exemplo n.º 2
0
    def classes_counter_indexes(context, data_set):

        classes_counter = AutoVivification()
        classes_indexes = AutoVivification()
        classes_texts = context["classifiers"][context["classifier_list"]
                                               [0]]["classes_names"]
        len_inputs = len(data_set[0]) - len(classes_texts)

        for class_text in classes_texts:
            column = [
                data_set[i][len_inputs + classes_texts.index(class_text)]
                for i in range(len(data_set))
            ]
            classes_counter[class_text] = np.sum(column)
            classes_indexes[class_text] = column

        return classes_counter, classes_indexes
Exemplo n.º 3
0
def select_best_configuration_each_combination(in_file, out_file):
    # Sólo muestra la mejor configuración para cada combinación
    f = open(in_file)
    f2 = open(out_file, "w")
    lines = f.readlines()
    resultados = AutoVivification()
    for line in lines:
        resultados[line[:line.find(":")]] = line[line.find("\t") + 1:]

    temp = []
    for classifier_name in reversed([x for x in sorted(resultados.keys(), key=lambda y: resultados[y])]):
        res = re.search(r'[0-9]+', classifier_name[:classifier_name.find("_")])
        nombre = classifier_name[res.start():res.end()]
        if nombre not in temp:
            f2.write(classifier_name + ":\t")
            f2.write("%.4f\n" % (float(resultados[classifier_name])))
            temp.append(nombre)
    f.close()
    f2.close()
Exemplo n.º 4
0
def select_best_configuration_each_combination(in_file, out_file):
    # Sólo muestra la mejor configuración para cada combinación
    f = open(in_file)
    f2 = open(out_file, "w")
    lines = f.readlines()
    resultados = AutoVivification()
    for line in lines:
        resultados[line[:line.find(":")]] = line[line.find("\t") + 1:]

    temp = []
    for classifier_name in reversed(
        [x for x in sorted(resultados.keys(), key=lambda y: resultados[y])]):
        res = re.search(r'[0-9]+', classifier_name[:classifier_name.find("_")])
        nombre = classifier_name[res.start():res.end()]
        if nombre not in temp:
            f2.write(classifier_name + ":\t")
            f2.write("%.4f\n" % (float(resultados[classifier_name])))
            temp.append(nombre)
    f.close()
    f2.close()
Exemplo n.º 5
0
def structure_combined_features():
    from mullpy.auxiliar import AutoVivification

    structure = AutoVivification()
    i = 0
    for amount in range(2, 5 + 1):
        temporal = list(
            itertools.combinations([
                "AGE", "EDUC", "LIMMTOTAL", "FAQ", "MMSE", "GDS", "LDELTOTAL"
            ], amount))
        for t in temporal:
            structure[i] = list(t)
            i += 1
    return structure
Exemplo n.º 6
0
    def __init__(self, context, ensemble_name, information, pattern_kind_list):
        """
        Complete the Information class with the ensembles decisions
        Self.info as a AutoVivification class might contain only ensemble internal information
        Build real and discretized outputs of the Ensemble, depending of the Ensemble kind.
        """
        self.info = AutoVivification()
        self.weights = None
        self.determine_ensemble_threshold(context, ensemble_name)

        for pattern_kind in pattern_kind_list:
            self._init_decision_matrix(context, ensemble_name, pattern_kind)
            self._build_decision_matrix(context, ensemble_name, information,
                                        pattern_kind)
            if nested_dict_access(
                ["classifiers", ensemble_name, "meta_learner"], context):
                self.meta_learner(context, ensemble_name, information)
            else:
                self._schedule_decisions(context, ensemble_name, information,
                                         pattern_kind)
Exemplo n.º 7
0
    def random_distribution(self, context):
        """
        Bagging methods come in many flavours but mostly differ from each other by the way they draw random subsets
         of the training set:

        -When random subsets of the dataset are drawn as random subsets of the samples, then this algorithm is known
        as Pasting Rvotes.
        -When samples are drawn with replacement, then the method is known as Bagging.
        -When random subsets of the dataset are drawn as random subsets of the features, then the method is known as
        Random Subspaces.
        -When base estimators are built on subsets of both samples and features, then the method is known as Random
        Patches.

        group_successive variable groups each X instances. Each of these successive instances has to be together in
        the sampling process
        """
        total_length = 0
        lengths = AutoVivification()
        for pattern_kind in context["patterns"].patterns[
                context["classifier_list"][0]]:
            lengths[pattern_kind] = len(context["patterns"].patterns[
                context["classifier_list"][0]][pattern_kind])
            total_length += lengths[pattern_kind]

        #Check if the length of patterns have the same size
        for classifier_name in context["classifier_list"]:
            for pattern_kind in context["patterns"].patterns[classifier_name]:
                if len(context["patterns"].patterns[classifier_name]
                       [pattern_kind]) != lengths[pattern_kind]:
                    raise ValueError(
                        'The length of the %s pattern of classifier %s has different size from others'
                        % pattern_kind, classifier_name)

        if context["preprocess"]["random_distribution"]["group_successive"]:
            total_length = int(total_length / context["preprocess"]
                               ["random_distribution"]["group_successive"])
            for pattern_kind in lengths:
                lengths[pattern_kind] = int(
                    lengths[pattern_kind] / context["preprocess"]
                    ["random_distribution"]["group_successive"])

        dir_name = context["general_path"] + "patterns/" + context[
            "classifiers"][context["classifier_list"][0]]["set"]
        filters = AutoVivification()
        ###Specific kind of sampling###
        #############
        ######BAGGING
        #############
        if "bagging" in context["preprocess"]["random_distribution"] and \
                context["preprocess"]["random_distribution"]["bagging"]["activate"]:
            for pattern_kind in context["patterns_texts"]:
                filters[pattern_kind] = []
            self.bagging(context, filters, lengths, total_length)
            dir_name += "_bagging/"
        #############
        ######PASTING
        #############
        elif "pasting_Rvotes" in context["preprocess"]["random_distribution"] and \
                context["preprocess"]["random_distribution"]["pasting_Rvotes"]["activate"]:
            for pattern_kind in context["patterns_texts"]:
                filters[pattern_kind] = []
            self.pasting_rvotes(context, filters, lengths, total_length)
            dir_name += "_pasting_Rvotes/"
        #################
        #RANDOM SUBSPACES
        #################
        elif "random_subspaces" in context["preprocess"]["random_distribution"] and \
                context["preprocess"]["random_distribution"]["random_subspaces"]["activate"]:
            features_amount = self.check_features_amount(context)
            for pattern_kind in context["patterns_texts"]:
                filters[pattern_kind] = []
            self.random_subspaces(context, filters, features_amount)
            dir_name += "_random_subspaces/"
        #############
        #COMBINATIONS
        #############
        elif "all_features_combination" in context["preprocess"]["random_distribution"] and \
                context["preprocess"]["random_distribution"]["all_features_combination"]["activate"]:
            features_amount = self.check_features_amount(context)
            for pattern_kind in context["patterns_texts"]:
                filters[pattern_kind] = []
            self.all_features_combination(context, filters, features_amount)
            dir_name += "_features_combination/"
            context["preprocess"]["random_distribution"][
                "number_base_classifiers"] = len(filters["learning"])
        ###############
        #RANDOM PATCHES
        ###############
        elif "random_patches" in context["preprocess"]["random_distribution"] and \
                context["preprocess"]["random_distribution"]["random_patches"]["activate"]:
            dir_name += "_random_patches/"
        ###############
        #K-FOLD
        ###############
        elif "k_fold" in context["preprocess"]["random_distribution"] and \
                context["preprocess"]["random_distribution"]["k_fold"]["activate"]:
            for pattern_kind in context["preprocess"]["random_distribution"][
                    "k_fold"]["percents"]:
                filters[pattern_kind] = []
            self.k_fold(context, filters)
            dir_name += "_k_fold/"
        ###############
        #Forecasting distribution
        ###############
        elif "forecasting_distribution" in context["preprocess"]["random_distribution"] and \
                context["preprocess"]["random_distribution"]["forecasting_distribution"]["activate"]:
            self.forecasting_distribution(context, filters)
            dir_name += "_walking_forward/"

            ###Common functions###
        elif "bagging" in context["preprocess"]["random_distribution"] and \
                context["preprocess"]["random_distribution"]["bagging"]["activate"] \
                or "pasting_Rvotes" in context["preprocess"]["random_distribution"] and \
                        context["preprocess"]["random_distribution"]["pasting_Rvotes"]["activate"]:
            if context["preprocess"]["random_distribution"][
                    "group_successive"]:
                for kind_of in filters:
                    for filter in filters[kind_of]:
                        for i in range(len(filter)):
                            filter[i] = (
                                filter[i] * context["preprocess"]
                                ["random_distribution"]["group_successive"])
                            for j in range(
                                    1, context["preprocess"]
                                ["random_distribution"]["group_successive"]):
                                filter.append(filter[i] + j)

        path_exists(dir_name)

        self._generate_new_patterns_random_distribution(
            context, filters, dir_name)
Exemplo n.º 8
0
    def k_fold(self, context, filters):
        classes_texts = context["classifiers"][context["classifier_list"]
                                               [0]]["classes_names"]
        num_instances = sum([
            len(context["patterns"].patterns[context["classifier_list"][0]][x])
            for x in context["patterns"].patterns[context["classifier_list"]
                                                  [0]]
        ])

        data_set = None
        for i, filter_name in enumerate(context["patterns"].patterns[
                context["classifier_list"][0]].keys()):
            if i == 0:
                data_set = context["patterns"].patterns[
                    context["classifier_list"][0]][filter_name]
            else:
                data_set = np.concatenate(
                    data_set, context["patterns"].patterns[
                        context["classifier_list"][0]][filter_name])

        total_classes_counter, classes_indexes = self.classes_counter_indexes(
            context, data_set)
        classes_counter = AutoVivification()
        min_limit_classes = np.min([
            total_classes_counter[class_counter]
            for class_counter in total_classes_counter
        ])

        for i in range(context["preprocess"]["random_distribution"]
                       ["number_base_classifiers"]):
            total_indexes = []
            for j, filter_name in enumerate(["learning", "validation"]):
                aux_list = []
                aux_percent = context["preprocess"]["random_distribution"][
                    "k_fold"]["percents"][filter_name]
                if j == len(context["preprocess"]["random_distribution"]
                            ["k_fold"]["percents"]) - 1:
                    filters[filter_name].append([
                        x for x in range(len(data_set))
                        if x not in total_indexes
                    ])
                    break
                else:
                    if context["preprocess"]["random_distribution"]["k_fold"][
                            "balanced"]:
                        total_instances = 0
                        for class_text in context["classifiers"][context[
                                "classifier_list"][0]]["classes_names"]:
                            classes_counter[filter_name][class_text] = np.ceil(
                                aux_percent * min_limit_classes)
                            total_instances += classes_counter[filter_name][
                                class_text]
                    else:
                        total_instances = np.ceil(aux_percent * num_instances)

                len_inputs = len(data_set[0]) - len(classes_texts)
                while len(aux_list) != total_instances:
                    value = np.random.randint(0, len(data_set))
                    if value not in total_indexes:
                        if context["preprocess"]["random_distribution"][
                                "k_fold"]["balanced"]:
                            if classes_counter[filter_name][classes_texts[list(
                                    data_set[value][len_inputs:]).index(
                                        1)]] > 0:
                                total_indexes.append(value)
                                aux_list.append(value)
                                classes_counter[filter_name][classes_texts[
                                    list(data_set[value][len_inputs:]).index(
                                        1)]] -= 1
                        else:
                            total_indexes.append(value)
                            aux_list.append(value)

                filters[filter_name].append(aux_list)
Exemplo n.º 9
0
    def classes_error(self, context, classifier_name):

        self.info[classifier_name]["selection_errors"] = []

        statistics_class = Statistics()
        values = AutoVivification()
        pattern_kind = context["pattern_kind"]
        outputs_kind = context["outputs_kind"]

        if classifier_name in context["classifier_list"]:
            temporal_patterns = copy.deepcopy(
                context["patterns"].patterns[classifier_name][pattern_kind])
        else:
            original = self.info[classifier_name][outputs_kind][pattern_kind]
            original_pattern_ref = context["patterns"].patterns[
                classifier_name][pattern_kind]

        for i in range(
                1,
                len(context["classifiers"][classifier_name]["classes_names"])):
            temp = [1] * i
            temp.extend([-1] * (len(
                context["classifiers"][classifier_name]["classes_names"]) - i))
            values[i] = [temp]
            for new in permutations(values[i][0]):
                if new not in values[i]:
                    values[i].append(new)

            if classifier_name in context["classifier_list"]:
                context["patterns"].modify_patterns_temporally(
                    classifier_name, pattern_kind,
                    context["patterns"].filter_classes(classifier_name,
                                                       pattern_kind,
                                                       values[i]))
                self.build_real_outputs(context, classifier_name, pattern_kind)
                self.discretize_outputs(context, classifier_name, pattern_kind)
                ref_patterns = context["patterns"].patterns[classifier_name][
                    pattern_kind]
            else:
                positions = [
                    position
                    for position, instance in enumerate(original_pattern_ref)
                    if instance[1] in values[i]
                ]
                self.info[classifier_name][outputs_kind][pattern_kind] = \
                    [original[i] for i in range(len(original)) if i in positions]
                ref_patterns = [
                    original_pattern_ref[i]
                    for i in range(len(original_pattern_ref)) if i in positions
                ]

            statistics_class.goodness(
                context, classifier_name,
                self.info[classifier_name][outputs_kind][pattern_kind],
                ref_patterns)
            self.info[classifier_name]["selection_errors"].append(
                statistics_class.measures[classifier_name]['E'])

            if classifier_name in context["classifier_list"]:
                #Recovery the original patterns
                context["patterns"].modify_patterns_temporally(
                    classifier_name, pattern_kind, temporal_patterns)
                self.build_real_outputs(context, classifier_name, pattern_kind)
                self.discretize_outputs(context, classifier_name, pattern_kind)
            else:
                self.info[classifier_name][outputs_kind][
                    pattern_kind] = original
                from mullpy.ensembles import Ensemble

                Ensemble(context, classifier_name, self, [pattern_kind])
Exemplo n.º 10
0
 def __init__(self):
     """
     Internal structure as AutoVivification class
     """
     self.info = AutoVivification()
Exemplo n.º 11
0
 def __init__(self, context):
     self.patterns = AutoVivification()
     for classifier_name in context["classifier_list"]:
         for pattern_kind in context["patterns_texts"]:
             self.patterns[classifier_name][pattern_kind] = None
Exemplo n.º 12
0
 def __init__(self):
     """
     Initialize the internal structure as AutoVivification class
     """
     self.measures = AutoVivification()
Exemplo n.º 13
0
class Statistics:
    """
    The class where are defined all statistics functions as goodness, standard deviation or mean square error.
    All the information relative to the classifiers is saved on the class structure indexable by name
    """

    def __init__(self):
        """
        Initialize the internal structure as AutoVivification class
        """
        self.measures = AutoVivification()

    #####################################################
    @staticmethod
    def change_ranges(value, **kwargs):
        """
        Project a given value, from old ranges to new ranges
        """
        if len(kwargs.keys()) != 4:
            raise ValueError("Change ranges need 4 parameters")

        old_min = kwargs["oldMin"]
        old_max = kwargs["oldMax"]
        new_max = kwargs["newMax"]
        new_min = kwargs["newMin"]

        old_range = old_max - old_min
        new_range = new_max - new_min
        old_value = value

        return (((old_value - old_min) * new_range) / old_range) + new_min

    #############################################
    def rms(self, classifier_name, context, information, pattern_kind):
        """
        Calculate all rms to different patterns kind relative to the classifier.
        """
        list_outputs_classifier = information.info[classifier_name]["continuous_outputs"][pattern_kind]
        self.measures[classifier_name]["rms"][pattern_kind] = 0.0
        pattern = copy.deepcopy(context["patterns"].patterns[classifier_name][pattern_kind])
        #Difference between desired outputs(patterns) and the real outputs
        classes_texts = context["classifiers"][classifier_name]["classes_names"]
        len_inputs = len(pattern[0]) - len(classes_texts)
        for outputs, desired in zip(list_outputs_classifier, pattern):
            if context["classifiers"][classifier_name]["patterns"]["range"] is not [0, 1]:
                for i, desire in enumerate(desired[len_inputs:]):
                    desired[len_inputs:][i] = \
                        self.change_ranges(
                            desire,
                            oldMin=context["classifiers"][classifier_name]["patterns"]["range"][0],
                            oldMax=context["classifiers"][classifier_name]["patterns"]["range"][1],
                            newMin=0,
                            newMax=1)

            self.measures[classifier_name]["rms"][pattern_kind] += sum(0.5 * (desired[len_inputs:] - outputs) ** 2)
        self.measures[classifier_name]["rms"][pattern_kind] /= float(len(pattern))

    #############################################

    @staticmethod
    def discretize_outputs(value):
        """
        Used like a lambda function
        """
        if value == -1:
            return 0.
        return value

    #############################################

    def initialize_goodness(self, context, classifier_name, instances_number, classes_names):
        #Initialize the structure of goodness values.
        for values_kind in ['fp', 'fn', 'tp', 'tn']:
            self.measures[classifier_name]["matrix"][values_kind] = \
                np.zeros([instances_number, len(classes_names)], dtype=np.float16)

            self.measures[classifier_name][values_kind] = 0.0
            for class_text in classes_names:
                self.measures[classifier_name][class_text][values_kind] = 0.0

    #############################################

    def build_list_oracle_outputs(self, classifier_name):
        self.measures[classifier_name]["matrix"]["oracle_outputs"] = \
            self.measures[classifier_name]["matrix"]["tp"] + self.measures[classifier_name]["matrix"]["tn"]

    #############################################

    def goodness(self, context, classifier_name, list_outputs_classifier, pattern_outputs):
        """
        Calculate the goodness of the classifier. It contain an error formula to penalize more the instances
        with one class, and less with more classes presents in the same instances.
        It is a generalization of the multiclass problem.
        Calculate the goodness in terms of FP, FN, TP, TN and different kinds of error as global error,
        false positive error, false negative error.
        """
        #TODO:Change the input parameters from list outputs and patterns to Information
        if not len(pattern_outputs):
            raise NameError('Statistics doesnt get the patterns of the classifier %s correctly at dir %s' %
                            (classifier_name, context["classifiers"][classifier_name]["paths"]["patterns"]))
        if not len(list_outputs_classifier):
            raise NameError('Statistics doesnt get the outputs of the classifier %s correctly' % classifier_name)
        if len(list_outputs_classifier) != len(pattern_outputs):
            raise NameError('Different lengths in patterns and outputs on classifier %s' % classifier_name)

        #############################################
        #To improve code readability
        classes_names = context["classifiers"][classifier_name]["classes_names"]
        instances_number = float(len(pattern_outputs))
        len_inputs = len(pattern_outputs[0]) - len(classes_names)

        self.initialize_goodness(context, classifier_name, int(instances_number), classes_names)

        #############################################
        #Measure the error by instance
        for instance in range(int(instances_number)):
            #Number of classes present in an instance. For multilabel problems
            for output_index, class_text in enumerate(classes_names):
                output_wanted = pattern_outputs[instance][len_inputs:][output_index]
                output = list_outputs_classifier[instance][output_index]

                if output == (-1.):
                    output = 0.
                if output_wanted == (-1.):
                    output_wanted = 0.

                #If there is an error
                if output_wanted != output:
                    #If output wanted was activated means a FN
                    if output_wanted == 1.0:
                        #FN
                        self.measures[classifier_name]["matrix"]['fn'][instance][output_index] = 1.
                    else:
                        # If not output wanted was activated means a FP
                        self.measures[classifier_name]["matrix"]['fp'][instance][output_index] = 1.
                #No error
                else:
                    #TP
                    if output_wanted == 1.0:
                        self.measures[classifier_name]["matrix"]['tp'][instance][output_index] = 1.
                    #TN
                    else:
                        self.measures[classifier_name]["matrix"]['tn'][instance][output_index] = 1.
        #############################################
        #The goodness values in terms of sum of the instances
        for good in self.measures[classifier_name]["matrix"].keys():
            self.measures[classifier_name][good] = np.sum(self.measures[classifier_name]["matrix"][good])
            for output_index, class_text in enumerate(classes_names):
                self.measures[classifier_name][class_text][good] = \
                    np.sum(self.measures[classifier_name]["matrix"][good], 0)[output_index]

    #########################################################################################

    def error_fn(self, classifier_name, context, information, pattern_kind):
        pattern_outputs = context["patterns"].patterns[classifier_name][pattern_kind]
        classes_names = context["classifiers"][classifier_name]["classes_names"]

        self.measures[classifier_name]["error_fn"] = 0.0
        for class_text in classes_names:
            self.measures[classifier_name][class_text]["error_fn"] = 0.0

        for output_index, class_text in enumerate(classes_names):
            num_instances_of_the_class = np.sum([self.measures[classifier_name]["matrix"]['tp'][i][output_index] +
                                                 self.measures[classifier_name]["matrix"]['fn'][i][output_index]
                                                 for i in range(len(pattern_outputs))])

            #The error depends on the number of instances of it class and on the total number of classes
            if len(classes_names) == 1:
                self.measures[classifier_name][class_text]["error_fn"] = \
                    0.5 * np.sum(self.measures[classifier_name]["matrix"]["fn"], 0)[output_index] / \
                    num_instances_of_the_class
                self.measures[classifier_name][class_text]["error_fn"] = \
                    0.5 * np.sum(self.measures[classifier_name]["matrix"]["fn"], 0)[output_index] / \
                    (float(len(pattern_outputs)) - num_instances_of_the_class)
            else:
                self.measures[classifier_name][class_text]["error_fn"] = \
                    (np.sum(self.measures[classifier_name]["matrix"]["fn"], 0)[output_index] /
                     num_instances_of_the_class) / len(classes_names)

        for class_text in classes_names:
            self.measures[classifier_name]["error_fn"] += self.measures[classifier_name][class_text]["error_fn"]

    #########################################################################################

    def error_fp(self, classifier_name, context, information, pattern_kind):
        pattern_outputs = context["patterns"].patterns[classifier_name][pattern_kind]
        classes_names = context["classifiers"][classifier_name]["classes_names"]

        self.measures[classifier_name]["error_fp"] = 0.0
        for class_text in classes_names:
            self.measures[classifier_name][class_text]["error_fp"] = 0.0

        for output_index, class_text in enumerate(classes_names):
            num_instances_of_the_class = np.sum([self.measures[classifier_name]["matrix"]['tp'][i][output_index] +
                                                 self.measures[classifier_name]["matrix"]['fn'][i][output_index]
                                                 for i in range(len(pattern_outputs))])

            #The error depends on the number of instances of it class and on the total number of classes
            if len(classes_names) == 1:
                self.measures[classifier_name][class_text]["error_fp"] = \
                    0.5 * np.sum(self.measures[classifier_name]["matrix"]["FP"], 0)[output_index] / \
                    num_instances_of_the_class
                self.measures[classifier_name][class_text]["error_fp"] = \
                    0.5 * np.sum(self.measures[classifier_name]["matrix"]["FP"], 0)[output_index] / \
                    (float(len(pattern_outputs)) - num_instances_of_the_class)
            else:
                self.measures[classifier_name][class_text]["error_fp"] = \
                    (np.sum(self.measures[classifier_name]["matrix"]["FP"], 0)[output_index] /
                     num_instances_of_the_class) / len(classes_names)

        for class_text in classes_names:
            self.measures[classifier_name]["error_fp"] += self.measures[classifier_name][class_text]["error_fp"]

    #########################################################################################

    def error(self, classifier_name, context, information, pattern_kind):
        """
        Calculate the errors of the classifier given by name.
        This error compensates the minority class by dividing each error class by the number of instances
        of that class, and finally divided by the number of classes.
        """
        self.error_fp(classifier_name, context, information, pattern_kind)
        self.error_fn(classifier_name, context, information, pattern_kind)

        for class_text in context["classifiers"][classifier_name]["classes_names"]:
            self.measures[classifier_name][class_text]["error"] = \
                self.measures[classifier_name][class_text]["error_fp"] + \
                self.measures[classifier_name][class_text]["error_fn"]

        self.measures[classifier_name]["error"] = \
            self.measures[classifier_name]["error_fp"] + self.measures[classifier_name]["error_fn"]

    #########################################################################################

    def balanced_accuracy(self, classifier_name, context, *args):
        self.tnr(classifier_name, context)
        self.tpr(classifier_name, context)

        for class_text in context["classifiers"][classifier_name]["classes_names"]:
            self.measures[classifier_name][class_text]["balanced_accuracy"] = \
                (self.measures[classifier_name][class_text]["tpr"] +
                 self.measures[classifier_name][class_text]["tnr"]) / 2.

        self.measures[classifier_name]["balanced_accuracy"] = \
            np.mean([self.measures[classifier_name][x]["balanced_accuracy"] for x in
                     context["classifiers"][classifier_name]["classes_names"]])

    #########################################################################################

    def g_means(self, classifier_name, context, *args):
        """
        Geometric mean as the sqrt of the sensibility*specificity
        """
        self.tnr(classifier_name, context)
        self.tpr(classifier_name, context)

        self.measures[classifier_name]["g_means"] = np.sqrt(np.dot(self.measures[classifier_name]["tnr"],
                                                                   self.measures[classifier_name]["tpr"]))

        for class_text in context["classifiers"][classifier_name]["classes_names"]:
            self.measures[classifier_name][class_text]["g_means"] = \
                np.sqrt(np.dot(self.measures[classifier_name][class_text]["tnr"],
                               self.measures[classifier_name][class_text]["tpr"]))

    #########################################################################################

    def tnr(self, classifier_name, context, *args):
        """
        True Negative Rate
        """
        fp = self.measures[classifier_name]["fp"]
        tn = self.measures[classifier_name]["tn"]

        if tn + fp > 0:
            self.measures[classifier_name]["tnr"] = np.divide(tn, tn + fp)
        else:
            self.measures[classifier_name]["tnr"] = 0.0

        for class_text in context["classifiers"][classifier_name]["classes_names"]:
            fp = self.measures[classifier_name][class_text]["fp"]
            tn = self.measures[classifier_name][class_text]["tn"]

            if tn + fp > 0:
                self.measures[classifier_name]["tnr"] = np.divide(tn, tn + fp)
            else:
                self.measures[classifier_name]["tnr"] = 0.0

    #########################################################################################

    def tpr(self, classifier_name, context, *args):
        """
        True Positive Rate
        """
        tp = self.measures[classifier_name]["tp"]
        fn = self.measures[classifier_name]["fn"]

        if tp + fn > 0:
            self.measures[classifier_name]["tpr"] = np.divide(tp, tp + fn)
        else:
            self.measures[classifier_name]["tpr"] = 0.0

        for class_text in context["classifiers"][classifier_name]["classes_names"]:
            tp = self.measures[classifier_name][class_text]["tp"]
            fn = self.measures[classifier_name][class_text]["fn"]

            if tp + fn > 0:
                self.measures[classifier_name]["tpr"] = np.divide(tp, tp + fn)
            else:
                self.measures[classifier_name]["tpr"] = 0.0

    #########################################################################################
    @staticmethod
    def get_ytrue_ypred(context, information, classifier_name, pattern_kind):
        len_classes = len(context["classifiers"][context["classifier_list"][0]]["classes_names"])
        len_inputs = len(context["patterns"].patterns[classifier_name][pattern_kind][0]) - len_classes
        y_true = list(context["patterns"].patterns[classifier_name][pattern_kind][:, range(len_inputs,
                                                                                           len_inputs +
                                                                                           len_classes)])
        y_pred = information.info[classifier_name]["continuous_outputs"][pattern_kind]
        return y_true, y_pred

    #########################################################################################

    def explained_variance_score(self, classifier_name, context, information, pattern_kind):
        from sklearn.metrics import explained_variance_score

        y_true, y_pred = self.get_ytrue_ypred(context, information, classifier_name, pattern_kind)
        self.measures[classifier_name]["explained_variance_score"] = \
            explained_variance_score(y_true, y_pred)

    #########################################################################################

    def mean_absolute_error(self, classifier_name, context, information, pattern_kind):
        from sklearn.metrics import mean_absolute_error

        y_true, y_pred = self.get_ytrue_ypred(context, information, classifier_name, pattern_kind)
        self.measures[classifier_name]["explained_variance_score"] = \
            mean_absolute_error(y_true, y_pred)

    #########################################################################################

    def mean_squared_error(self, classifier_name, context, information, pattern_kind):
        from sklearn.metrics import mean_squared_error

        y_true, y_pred = self.get_ytrue_ypred(context, information, classifier_name, pattern_kind)
        self.measures[classifier_name]["mean_squared_error"] = \
            mean_squared_error(y_true, y_pred)

    #########################################################################################

    def r2_score(self, classifier_name, context, information, pattern_kind):
        from sklearn.metrics import r2_score

        y_true, y_pred = self.get_ytrue_ypred(context, information, classifier_name, pattern_kind)
        self.measures[classifier_name]["r2_score"] = \
            r2_score(y_true, y_pred)

    #########################################################################################
    @staticmethod
    def confusion_matrix(classifier_name, context, information, pattern_kind):
        from sklearn.metrics import confusion_matrix

        confusion_matrix(context["patterns"].patterns[classifier_name][pattern_kind],
                         information.info[classifier_name]["discretized_outputs"][pattern_kind],
                         context["classifiers"][classifier_name]["classes_names"])

    #########################################################################################
    @staticmethod
    def matthews_corrcoef(classifier_name, context, information, pattern_kind):
        from sklearn.metrics import matthews_corrcoef

        matthews_corrcoef(context["patterns"].patterns[classifier_name][pattern_kind],
                          information.info[classifier_name]["discretized_outputs"][pattern_kind])

    #########################################################################################

    def hamming_loss(self, classifier_name, context, information, pattern_kind):
        from sklearn.metrics import hamming_loss

        self.measures[classifier_name]["hamming_loss"] = \
            hamming_loss(
                context["patterns"].patterns[classifier_name][pattern_kind],
                information.info[classifier_name]["discretized_outputs"][pattern_kind])

    #########################################################################################

    def kappa(self, classifier_name, *args):
        self.measures[classifier_name]["kappa"] = \
            self.measures[classifier_name]["matrix"]['tp'] + self.measures[classifier_name]["matrix"]['tn']

    #########################################################################################

    def f_measure(self, classifier_name, *args):
        self.recall(classifier_name, *args)
        self.accuracy(classifier_name, *args)

        self.measures[classifier_name]["f_measure"] = \
            (2 * self.measures[classifier_name]["recall"] * self.measures[classifier_name]["accuracy"]) / \
            (self.measures[classifier_name]["recall"] + self.measures[classifier_name]["accuracy"])

    #########################################################################################

    def accuracy(self, classifier_name, *args):
        self.measures[classifier_name]["accuracy"] = \
            self.measures[classifier_name]['tp'] / (
                self.measures[classifier_name]['tp'] + self.measures[classifier_name]['fp'])

    #########################################################################################

    def error_rate(self, classifier_name, *args):
        self.measures[classifier_name]["error_rate"] = \
            self.measures[classifier_name]["matrix"]['tp'] + self.measures[classifier_name]["matrix"]['tn'] / \
                                                             (np.sum([self.measures[classifier_name]["matrix"][goodness]
                                                                      for goodness in
                                                                      self.measures[classifier_name]["matrix"].keys()]))

    #########################################################################################

    def accuracy_rate(self, classifier_name, *args):
        self.measures[classifier_name]["accuracy_rate"] = \
            self.measures[classifier_name]["matrix"]['fp'] + self.measures[classifier_name]["matrix"]['fn'] / \
                                                             (np.sum(
                                                                 [self.measures[classifier_name]["matrix"][goodness] for
                                                                  goodness in
                                                                  self.measures[classifier_name]["matrix"].keys()]))

    #########################################################################################

    def recall(self, classifier_name, *args):
        self.tpr(classifier_name, *args)
        self.measures[classifier_name]["recall"] = self.measures[classifier_name]["tpr"]

    #########################################################################################

    def fn_rate(self, classifier_name, *args):
        self.measures[classifier_name]["fn_rate"] = self.measures[classifier_name]["matrix"]['fn'] / (
            self.measures[classifier_name]["matrix"]['tp'] + self.measures[classifier_name]["matrix"]['fn'])

    #########################################################################################

    def fp_rate(self, classifier_name, *args):
        self.measures[classifier_name]["fp_rate"] = self.measures[classifier_name]["matrix"]['fp'] / (
            self.measures[classifier_name]["matrix"]['tn'] + self.measures[classifier_name]["matrix"]['fp'])

    #########################################################################################

    def auc(self, classifier_name, context, information, pattern_kind):
        from sklearn.metrics import roc_auc_score

        classes_names = context["classifiers"][classifier_name]["classes_names"]
        inputs = len(context["patterns"].patterns[classifier_name][pattern_kind][0]) - len(classes_names)
        self.measures[classifier_name]["auc"] = 0.0
        for i, class_name in enumerate(classes_names):
            self.measures[classifier_name][class_name]["auc"] = \
                roc_auc_score(context["patterns"].patterns[classifier_name][pattern_kind][:, inputs + i],
                              information.info[classifier_name]["continuous_outputs"][pattern_kind][:, i])
            self.measures[classifier_name]["auc"] += self.measures[classifier_name][class_name]["auc"]

        self.measures[classifier_name]["auc"] = np.divide(np.mean(self.measures[classifier_name]["auc"]),
                                                          len(classes_names))

    #########################################################################################

    def std(self, classifier_name, context, *args):
        """
        Calculate the standard deviation of the classifier passed as args, for each kind of error.
        Thus, there is a std for false positive error, another to false positive error, etc.
        """

        self.measures[classifier_name]['dt_efp'] = np.std(self.measures[classifier_name]["matrix"]['efp'])
        self.measures[classifier_name]['dt_efn'] = np.std(self.measures[classifier_name]["matrix"]['efn'])
        self.measures[classifier_name]['dt_e'] = np.std(self.measures[classifier_name]["matrix"]['efp'] +
                                                        self.measures[classifier_name]["matrix"]['efn'])

        for output_index, class_text in enumerate(context["classifiers"][classifier_name]["classes_names"]):
            self.measures[classifier_name][class_text]['dt_efp'] = \
                np.std(self.measures[classifier_name]["matrix"]["efp"], 0)[output_index]
            self.measures[classifier_name][class_text]['dt_efn'] = \
                np.std(self.measures[classifier_name]["matrix"]["efn"], 0)[output_index]
            self.measures[classifier_name][class_text]['dt_e'] = \
                np.std(self.measures[classifier_name]["matrix"]["e"], 0)[output_index]

    #############################################
    @staticmethod
    def __build_multiple_name(sub_list):
        name = ""
        if type(sub_list) != list:
            for x_tuple in sub_list:
                name = "+".join([x for x in x_tuple])
        else:
            for i, name_i in enumerate(sub_list):
                if i == len(sub_list) - 1:
                    name += name_i
                else:
                    name += name_i + "+"
        return name

    #############################################

    def correctly_classified(self, sub_list):
        correctly_classified = np.zeros(len(self.measures[sub_list[0]]["matrix"]["oracle_outputs"]))
        for i in range(len(self.measures[sub_list[0]]["matrix"]["oracle_outputs"])):
            for j, classifier_name in enumerate(sub_list):
                if (np.array(self.measures[classifier_name]["matrix"]["oracle_outputs"][i]) == np.ones(
                        len(self.measures[classifier_name]["matrix"]["oracle_outputs"][i]))).all():
                    correctly_classified[i] += 1

        return correctly_classified

    #############################################

    def interrater_agreement_k_non_pairwise(self, context, sub_list):
        error = 0.0
        correctly_classified = self.correctly_classified(sub_list)
        p = np.sum([self.measures[x]['E'] for x in self.measures if 'E' in self.measures[x]]) / \
            (len(sub_list) * len(self.measures[sub_list[0]]["matrix"]["oracle_outputs"]))

        for i in range(len(correctly_classified)):
            error += correctly_classified[i] * (len(sub_list) - correctly_classified[i])

        if p == 0.0:
            p = np.exp(100)

        error /= len(self.measures[sub_list[0]]["matrix"]["oracle_outputs"]) * (len(sub_list) - 1) * p * (1 - p)
        return 1 - error

    #############################################

    def difficulty(self, context, sub_list):
        error = 0.0
        correctly_classified = self.correctly_classified(sub_list)
        mean_errors = np.mean(correctly_classified)
        for i in range(len(correctly_classified)):
            error += np.power((correctly_classified[i] - (correctly_classified[i] / mean_errors)), 2)
        error /= (len(self.measures[sub_list[0]]["matrix"]["oracle_outputs"]) * np.power(len(sub_list), 2))
        return 1. - error

    #############################################

    def kohavi_wolpert(self, context, sub_list):
        error = 0.0
        correctly_classified = self.correctly_classified(sub_list)
        for i in range(len(correctly_classified)):
            error += correctly_classified[i] * (len(sub_list) - correctly_classified[i])
        error /= len(sub_list)

        error /= (len(self.measures[sub_list[0]]["matrix"]["oracle_outputs"]) * np.power(len(sub_list), 2))
        return error

    #############################################

    def entropy(self, context, sub_list):
        Error = 0.0
        correctly_classified = self.correctly_classified(sub_list)
        for i in range(len(correctly_classified)):
            Error += (min(correctly_classified[i], len(sub_list) - correctly_classified[i])
                      /
                      (len(sub_list) - np.ceil(len(sub_list) / 2.)))

        Error /= len(self.measures[sub_list[0]]["matrix"]["oracle_outputs"])
        return Error

    #############################################

    def diversity_non_pairwise_structure(self, context, function, classifier_list):
        for i, classifier_name in enumerate(classifier_list):
            if context["interactive"]["activate"]:
                sys.stdout.write("\r{0}>".format("Completed:%f%%" % ((float(i) / len(classifier_list)) * 100)))
                sys.stdout.flush()

            # name = self.__build_multiple_name(sub_list)

            self.measures[classifier_name][function] = \
                getattr(self, function)(context, context["classifiers"][classifier_name]["classifiers"])

    #############################################

    def diversity_pairwise_structure(self, context, function, classifier_list):
        for i, classifier_1 in enumerate(classifier_list):
            if context["interactive"]["activate"]:
                sys.stdout.write("\r{0}>".format("Completed:%f%%" % ((float(i) / len(classifier_list)) * 100)))
                sys.stdout.flush()
            for classifier_2 in context["classifiers"].keys():

                if "pairwise_diversity" in self.measures[classifier_2].keys() and function in \
                        self.measures[classifier_2][
                            "pairwise_diversity"].keys() and classifier_1 in \
                        self.measures[classifier_2]["pairwise_diversity"][
                            function].keys():

                    self.measures[classifier_1]["pairwise_diversity"][function][classifier_2] = \
                        self.measures[classifier_2]["pairwise_diversity"][function][classifier_1]

                else:
                    self.measures[classifier_1]["pairwise_diversity"][function][classifier_2] = \
                        getattr(self, function)(classifier_1, classifier_2, context)

            vector = [self.measures[classifier_1]["pairwise_diversity"][function][x] for x in
                      self.measures[classifier_1]["pairwise_diversity"][function].keys() if x != classifier_1]
            self.measures[classifier_1]["pairwise_diversity"][function]["mean"] = np.mean(vector)
            self.measures[classifier_1]["pairwise_diversity"][function]["median"] = np.median(vector)
            self.measures[classifier_1]["pairwise_diversity"][function]["std"] = np.std(vector)
            self.measures[classifier_1]["pairwise_diversity"][function]["variance"] = np.var(vector)

    #############################################

    def error_correlation(self, classifier_1, classifier_2, context):
        return np.corrcoef(self.measures[classifier_1]["matrix"]["e"], self.measures[classifier_2]["matrix"]["e"])[0][1]

    #############################################

    def n01(self, classifier_1, classifier_2):
        counter = 0
        for a, b in zip(self.measures[classifier_1]["matrix"]["oracle_outputs"],
                        self.measures[classifier_2]["matrix"]["oracle_outputs"]):
            if np.sum(a) < len(a) and np.sum(b) == len(b):
                counter += 1
        return counter

    #############################################

    def n10(self, classifier_1, classifier_2):
        counter = 0
        for a, b in zip(self.measures[classifier_1]["matrix"]["oracle_outputs"],
                        self.measures[classifier_2]["matrix"]["oracle_outputs"]):
            if np.sum(a) == len(a) and np.sum(b) < len(b):
                counter += 1
        return counter

    #############################################

    def n11(self, classifier_1, classifier_2):
        counter = 0
        for a, b in zip(self.measures[classifier_1]["matrix"]["oracle_outputs"],
                        self.measures[classifier_2]["matrix"]["oracle_outputs"]):
            if (a == b).all() and np.sum(a) == len(a):
                counter += 1
        return counter

    #############################################

    def n00(self, classifier_1, classifier_2):
        counter = 0
        for a, b in zip(self.measures[classifier_1]["matrix"]["oracle_outputs"],
                        self.measures[classifier_2]["matrix"]["oracle_outputs"]):
            if np.sum(b) < len(b) and np.sum(a) < len(a):
                counter += 1
        return counter

    #############################################

    def _n_values(self, classifier_1, classifier_2, context):
        #this results may be divided
        n11 = None
        n00 = None
        n10 = None
        n01 = None

        if context["results"]["to_file"]["diversity_study"]["exact_match"]:
            n11 = self.n11(classifier_1, classifier_2)
            n00 = self.n00(classifier_1, classifier_2)
            n10 = self.n10(classifier_1, classifier_2)
            n01 = self.n01(classifier_1, classifier_2)

        elif context["results"]["to_file"]["diversity_study"]["by_class"]:
            # TODO: change this part
            for i in range(len(self.measures[classifier_1]["matrix"]["oracle_outputs"][0])):
                n11 = sum([1 if x == y and x == 1 else 0 for x, y in
                           zip(self.measures[classifier_1]["matrix"]["oracle_outputs"],
                               self.measures[classifier_2]["matrix"]["oracle_outputs"])])
                n00 = sum(
                    [1 if x == y and x == 0 else 0 for x, y in
                     zip(self.measures[classifier_1]["matrix"]["oracle_outputs"],
                         self.measures[classifier_2]["matrix"]["oracle_outputs"])])
                n01 = sum([1 if x != y and x == 0 and y == 1 else 0 for x, y in
                           zip(self.measures[classifier_1]["matrix"]["oracle_outputs"],
                               self.measures[classifier_2]["matrix"]["oracle_outputs"])])
                n10 = sum([1 if x != y and x == 1 and y == 0 else 0 for x, y in
                           zip(self.measures[classifier_1]["matrix"]["oracle_outputs"],
                               self.measures[classifier_2]["matrix"]["oracle_outputs"])])
        else:
            raise ValueError("No option selected in diversity study: by class or by exact match")

        return {"n11": n11, "N00": n00, "N01": n01, "N10": n10}

    #############################################

    def interrater_agreement_k(self, classifier_1, classifier_2, context):
        values = self._n_values(classifier_1, classifier_2, context)
        denominator = ((values["N11"] + values["N10"]) * (values["N01"] + values["N00"])) + \
                      ((values["N11"] + values["N01"]) * (values["N10"] + values["N00"]))
        numerator = 2 * ((values["N11"] * values["N00"]) - (values["N01"] * values["N10"]))
        return numerator / denominator

    #############################################

    def q_statistic(self, classifier_1, classifier_2, context):
        values = self._n_values(classifier_1, classifier_2, context)
        denominator = values["N11"] * values["N00"] + values["N01"] * values["N10"]
        if not denominator:
            denominator = 1
        return (values["N11"] * values["N00"] - values["N01"] * values["N10"]) / denominator

    #############################################

    def coefficient_p(self, classifier_1, classifier_2, context):
        values = self._n_values(classifier_1, classifier_2, context)
        denominator = np.sqrt((values["N11"] + values["N10"]) * (values["N01"] + values["N00"]) * (
            values["N11"] + values["N01"]) * (values["N10"] + values["N00"]))
        if not denominator:
            denominator = 1
        return (values["N11"] * values["N00"] - values["N01"] * values["N10"]) / denominator

    #############################################

    def disagreement(self, classifier_1, classifier_2, context):
        values = self._n_values(classifier_1, classifier_2, context)
        denominator = values["N11"] * values["N00"] + values["N01"] + values["N10"]
        if not denominator:
            denominator = 1
        return (values["N01"] + values["N10"]) / denominator

    #############################################

    def double_fault(self, classifier_1, classifier_2, context):
        values = self._n_values(classifier_1, classifier_2, context)
        denominator = values["N11"] + values["N10"] + values["N01"] + values["N00"]
        if not denominator:
            denominator = 1
        return values["N00"] / denominator

    ################################################################

    def configuration_evaluation(self, context, classifier_name, information):
        """
        To be reconstructed into a abstraction model. Initialize the information of each classifier.
        """
        #information_class.automatic_threshold_determine(context,classifier_name)
        pattern_kind = "validation"
        self.rms(classifier_name, context, information, pattern_kind)

        name = classifier_name[:re.search(r'[A-Za-z]+[0-9]*', classifier_name).end()]
        neurons = context["classifiers"][classifier_name]["configuration"]["neurons"][0]

        if len(self.measures[name]["evaluation"][neurons].keys()):
            self.measures[name]["evaluation"][neurons]['rms'].append(
                self.measures[classifier_name]['rms'][pattern_kind])
            self.measures[name]["evaluation"][neurons]['names'].append(classifier_name)
        else:
            self.measures[name]["evaluation"][neurons]['rms'] = []
            self.measures[name]["evaluation"][neurons]['rms'].append(
                self.measures[classifier_name]['rms'][pattern_kind])
            self.measures[name]["evaluation"][neurons]['names'] = []
            self.measures[name]["evaluation"][neurons]['names'].append(classifier_name)

    ####################################################

    def best_choice(self):
        """
        Select the best configuration of a NN classifier with the class attributes information.
        """
        for name in sorted([x for x in self.measures.keys() if "evaluation" in self.measures[x].keys()]):
            self.measures[name]["selection"]["rms"] = [99999.0]
            self.measures[name]["selection"]["neurons"]["hidden"] = [0]
            self.measures[name]["selection"]["name"] = [""]

            for neuron in sorted(self.measures[name]["evaluation"].keys()):
                self.measures[name]["selection"]["neurons"][neuron]["amount"] = 0
                rms_list, names_list = (list(t) for t in zip(*sorted(zip(self.measures[
                                                                             name]["evaluation"][neuron]['rms'],
                                                                         self.measures[name]["evaluation"][neuron][
                                                                             'names']))))

                mean_rms = np.mean(self.measures[name]["evaluation"][neuron]['rms'])

                if mean_rms < self.measures[name]["selection"]["rms"][0]:
                    self.measures[name]["selection"]["rms"] = [mean_rms]
                    self.measures[name]["selection"]["neurons"]["hidden"] = [neuron]
                    self.measures[name]["selection"]["neurons"][neuron]["amount"] = 1
                    self.measures[name]["selection"]["names"] = \
                        [self.measures[name]["evaluation"][neuron]['names'][self.measures[name]["evaluation"][neuron][
                            'rms'].index(sorted(
                                self.measures[name]["evaluation"][neuron][
                                    'rms'])[0])]]

                elif mean_rms == self.measures[name]["selection"]["rms"][0]:
                    self.measures[name]["selection"]["rms"].append(mean_rms)
                    self.measures[name]["selection"]["neurons"]["hidden"].append(neuron)
                    for i in range(len(self.measures[name]["evaluation"][neuron]['rms'])):
                        if rms_list[i] == rms_list[0]:
                            self.measures[name]["selection"]["names"].append(names_list[i])
                            self.measures[name]["selection"]["neurons"][neuron]["amount"] += 1

    ################################################################
    @staticmethod
    def pre_forecasting_statistic(context, classifier_name, information, pattern_kind):
        len_classes = len(context["classifiers"][classifier_name]["classes_names"])
        len_inputs = len(context["patterns"].patterns[classifier_name][pattern_kind][0]) - len_classes
        classifier_outputs = information.info[classifier_name]["continuous_outputs"][pattern_kind]
        classifier_patterns = \
            context["patterns"].patterns[classifier_name][pattern_kind][:, (len_inputs - 1, len_inputs)]
        len_patterns = len(context["patterns"].patterns[classifier_name][pattern_kind])
        d_change_pred = np.zeros(len_patterns)
        d_change_true = np.zeros(len_patterns)

        for i, instance, outputs in zip(range(len_patterns), classifier_patterns, classifier_outputs):
            d_change_true[i] = instance[1] - instance[0]
            d_change_pred[i] = outputs[0] - instance[0]
        return d_change_pred, d_change_true

    ################################################################

    def tendency_accuracy(self, classifier_name, context, information, pattern_kind):
        """
        Calculates the number of tends hits on a regression problem.
        The regression tolerance is a parameter added to avoid the errors due to overflow
        :param classifier_name:
        :param context:
        :param information:
        :param pattern_kind:
        :return:
        """
        array_change_pred, array_change_true = Statistics().pre_forecasting_statistic(context,
                                                                                     classifier_name,
                                                                                     information,
                                                                                     pattern_kind)
        hits = np.zeros(len(array_change_pred))
        for i, d_change_pred, d_change_true in zip(range(len(array_change_pred)), array_change_pred, array_change_true):
            if d_change_pred * d_change_true > 0.0:
                hits[i] = 1.
            elif d_change_pred * d_change_true == 0.0:
                hits[i] = 1.
            else:
                if np.sqrt(np.abs(d_change_pred * d_change_true)) < context["regression_tolerance_tendency"]:
                    hits[i] = 1.
                else:
                    hits[i] = 0.

        self.measures[classifier_name]["tendency_accuracy"] = np.mean(hits)

    #########################################################################################

    def mase(self, classifier_name, context, information, pattern_kind):
        """
        Mean Absolute error. Returns the inverse of the mase with a denominator that sums 1 to the error.
        It is intended to give an error between 1 and 0, where the 1 is the lowest error and 0.0 the highest in order
        to be compatible ordering different measures in the presentations.
        :param classifier_name:
        :param context:
        :param information:
        :param pattern_kind:
        :return:
        """
        array_change_pred, array_change_true = self.pre_forecasting_statistic(context, classifier_name,
                                                                              information, pattern_kind)

        self.measures[classifier_name]["mase"] = np.divide(np.mean(np.absolute(array_change_pred)),
                                                           np.mean(np.absolute(array_change_true)))
Exemplo n.º 14
0
 def __init__(self):
     """
     Initialize the internal structure as AutoVivification class
     """
     self.measures = AutoVivification()
Exemplo n.º 15
0
class Statistics:
    """
    The class where are defined all statistics functions as goodness, standard deviation or mean square error.
    All the information relative to the classifiers is saved on the class structure indexable by name
    """
    def __init__(self):
        """
        Initialize the internal structure as AutoVivification class
        """
        self.measures = AutoVivification()

    #####################################################
    @staticmethod
    def change_ranges(value, **kwargs):
        """
        Project a given value, from old ranges to new ranges
        """
        if len(kwargs.keys()) != 4:
            raise ValueError("Change ranges need 4 parameters")

        old_min = kwargs["oldMin"]
        old_max = kwargs["oldMax"]
        new_max = kwargs["newMax"]
        new_min = kwargs["newMin"]

        old_range = old_max - old_min
        new_range = new_max - new_min
        old_value = value

        return (((old_value - old_min) * new_range) / old_range) + new_min

    #############################################
    def rms(self, classifier_name, context, information, pattern_kind):
        """
        Calculate all rms to different patterns kind relative to the classifier.
        """
        list_outputs_classifier = information.info[classifier_name][
            "continuous_outputs"][pattern_kind]
        self.measures[classifier_name]["rms"][pattern_kind] = 0.0
        pattern = copy.deepcopy(
            context["patterns"].patterns[classifier_name][pattern_kind])
        #Difference between desired outputs(patterns) and the real outputs
        classes_texts = context["classifiers"][classifier_name][
            "classes_names"]
        len_inputs = len(pattern[0]) - len(classes_texts)
        for outputs, desired in zip(list_outputs_classifier, pattern):
            if context["classifiers"][classifier_name]["patterns"][
                    "range"] is not [0, 1]:
                for i, desire in enumerate(desired[len_inputs:]):
                    desired[len_inputs:][i] = \
                        self.change_ranges(
                            desire,
                            oldMin=context["classifiers"][classifier_name]["patterns"]["range"][0],
                            oldMax=context["classifiers"][classifier_name]["patterns"]["range"][1],
                            newMin=0,
                            newMax=1)

            self.measures[classifier_name]["rms"][pattern_kind] += sum(
                0.5 * (desired[len_inputs:] - outputs)**2)
        self.measures[classifier_name]["rms"][pattern_kind] /= float(
            len(pattern))

    #############################################

    @staticmethod
    def discretize_outputs(value):
        """
        Used like a lambda function
        """
        if value == -1:
            return 0.
        return value

    #############################################

    def initialize_goodness(self, context, classifier_name, instances_number,
                            classes_names):
        #Initialize the structure of goodness values.
        for values_kind in ['fp', 'fn', 'tp', 'tn']:
            self.measures[classifier_name]["matrix"][values_kind] = \
                np.zeros([instances_number, len(classes_names)], dtype=np.float16)

            self.measures[classifier_name][values_kind] = 0.0
            for class_text in classes_names:
                self.measures[classifier_name][class_text][values_kind] = 0.0

    #############################################

    def build_list_oracle_outputs(self, classifier_name):
        self.measures[classifier_name]["matrix"]["oracle_outputs"] = \
            self.measures[classifier_name]["matrix"]["tp"] + self.measures[classifier_name]["matrix"]["tn"]

    #############################################

    def goodness(self, context, classifier_name, list_outputs_classifier,
                 pattern_outputs):
        """
        Calculate the goodness of the classifier. It contain an error formula to penalize more the instances
        with one class, and less with more classes presents in the same instances.
        It is a generalization of the multiclass problem.
        Calculate the goodness in terms of FP, FN, TP, TN and different kinds of error as global error,
        false positive error, false negative error.
        """
        #TODO:Change the input parameters from list outputs and patterns to Information
        if not len(pattern_outputs):
            raise NameError(
                'Statistics doesnt get the patterns of the classifier %s correctly at dir %s'
                %
                (classifier_name,
                 context["classifiers"][classifier_name]["paths"]["patterns"]))
        if not len(list_outputs_classifier):
            raise NameError(
                'Statistics doesnt get the outputs of the classifier %s correctly'
                % classifier_name)
        if len(list_outputs_classifier) != len(pattern_outputs):
            raise NameError(
                'Different lengths in patterns and outputs on classifier %s' %
                classifier_name)

        #############################################
        #To improve code readability
        classes_names = context["classifiers"][classifier_name][
            "classes_names"]
        instances_number = float(len(pattern_outputs))
        len_inputs = len(pattern_outputs[0]) - len(classes_names)

        self.initialize_goodness(context, classifier_name,
                                 int(instances_number), classes_names)

        #############################################
        #Measure the error by instance
        for instance in range(int(instances_number)):
            #Number of classes present in an instance. For multilabel problems
            for output_index, class_text in enumerate(classes_names):
                output_wanted = pattern_outputs[instance][len_inputs:][
                    output_index]
                output = list_outputs_classifier[instance][output_index]

                if output == (-1.):
                    output = 0.
                if output_wanted == (-1.):
                    output_wanted = 0.

                #If there is an error
                if output_wanted != output:
                    #If output wanted was activated means a FN
                    if output_wanted == 1.0:
                        #FN
                        self.measures[classifier_name]["matrix"]['fn'][
                            instance][output_index] = 1.
                    else:
                        # If not output wanted was activated means a FP
                        self.measures[classifier_name]["matrix"]['fp'][
                            instance][output_index] = 1.
                #No error
                else:
                    #TP
                    if output_wanted == 1.0:
                        self.measures[classifier_name]["matrix"]['tp'][
                            instance][output_index] = 1.
                    #TN
                    else:
                        self.measures[classifier_name]["matrix"]['tn'][
                            instance][output_index] = 1.
        #############################################
        #The goodness values in terms of sum of the instances
        for good in self.measures[classifier_name]["matrix"].keys():
            self.measures[classifier_name][good] = np.sum(
                self.measures[classifier_name]["matrix"][good])
            for output_index, class_text in enumerate(classes_names):
                self.measures[classifier_name][class_text][good] = \
                    np.sum(self.measures[classifier_name]["matrix"][good], 0)[output_index]

    #########################################################################################

    def error_fn(self, classifier_name, context, information, pattern_kind):
        pattern_outputs = context["patterns"].patterns[classifier_name][
            pattern_kind]
        classes_names = context["classifiers"][classifier_name][
            "classes_names"]

        self.measures[classifier_name]["error_fn"] = 0.0
        for class_text in classes_names:
            self.measures[classifier_name][class_text]["error_fn"] = 0.0

        for output_index, class_text in enumerate(classes_names):
            num_instances_of_the_class = np.sum([
                self.measures[classifier_name]["matrix"]['tp'][i][output_index]
                +
                self.measures[classifier_name]["matrix"]['fn'][i][output_index]
                for i in range(len(pattern_outputs))
            ])

            #The error depends on the number of instances of it class and on the total number of classes
            if len(classes_names) == 1:
                self.measures[classifier_name][class_text]["error_fn"] = \
                    0.5 * np.sum(self.measures[classifier_name]["matrix"]["fn"], 0)[output_index] / \
                    num_instances_of_the_class
                self.measures[classifier_name][class_text]["error_fn"] = \
                    0.5 * np.sum(self.measures[classifier_name]["matrix"]["fn"], 0)[output_index] / \
                    (float(len(pattern_outputs)) - num_instances_of_the_class)
            else:
                self.measures[classifier_name][class_text]["error_fn"] = \
                    (np.sum(self.measures[classifier_name]["matrix"]["fn"], 0)[output_index] /
                     num_instances_of_the_class) / len(classes_names)

        for class_text in classes_names:
            self.measures[classifier_name]["error_fn"] += self.measures[
                classifier_name][class_text]["error_fn"]

    #########################################################################################

    def error_fp(self, classifier_name, context, information, pattern_kind):
        pattern_outputs = context["patterns"].patterns[classifier_name][
            pattern_kind]
        classes_names = context["classifiers"][classifier_name][
            "classes_names"]

        self.measures[classifier_name]["error_fp"] = 0.0
        for class_text in classes_names:
            self.measures[classifier_name][class_text]["error_fp"] = 0.0

        for output_index, class_text in enumerate(classes_names):
            num_instances_of_the_class = np.sum([
                self.measures[classifier_name]["matrix"]['tp'][i][output_index]
                +
                self.measures[classifier_name]["matrix"]['fn'][i][output_index]
                for i in range(len(pattern_outputs))
            ])

            #The error depends on the number of instances of it class and on the total number of classes
            if len(classes_names) == 1:
                self.measures[classifier_name][class_text]["error_fp"] = \
                    0.5 * np.sum(self.measures[classifier_name]["matrix"]["FP"], 0)[output_index] / \
                    num_instances_of_the_class
                self.measures[classifier_name][class_text]["error_fp"] = \
                    0.5 * np.sum(self.measures[classifier_name]["matrix"]["FP"], 0)[output_index] / \
                    (float(len(pattern_outputs)) - num_instances_of_the_class)
            else:
                self.measures[classifier_name][class_text]["error_fp"] = \
                    (np.sum(self.measures[classifier_name]["matrix"]["FP"], 0)[output_index] /
                     num_instances_of_the_class) / len(classes_names)

        for class_text in classes_names:
            self.measures[classifier_name]["error_fp"] += self.measures[
                classifier_name][class_text]["error_fp"]

    #########################################################################################

    def error(self, classifier_name, context, information, pattern_kind):
        """
        Calculate the errors of the classifier given by name.
        This error compensates the minority class by dividing each error class by the number of instances
        of that class, and finally divided by the number of classes.
        """
        self.error_fp(classifier_name, context, information, pattern_kind)
        self.error_fn(classifier_name, context, information, pattern_kind)

        for class_text in context["classifiers"][classifier_name][
                "classes_names"]:
            self.measures[classifier_name][class_text]["error"] = \
                self.measures[classifier_name][class_text]["error_fp"] + \
                self.measures[classifier_name][class_text]["error_fn"]

        self.measures[classifier_name]["error"] = \
            self.measures[classifier_name]["error_fp"] + self.measures[classifier_name]["error_fn"]

    #########################################################################################

    def balanced_accuracy(self, classifier_name, context, *args):
        self.tnr(classifier_name, context)
        self.tpr(classifier_name, context)

        for class_text in context["classifiers"][classifier_name][
                "classes_names"]:
            self.measures[classifier_name][class_text]["balanced_accuracy"] = \
                (self.measures[classifier_name][class_text]["tpr"] +
                 self.measures[classifier_name][class_text]["tnr"]) / 2.

        self.measures[classifier_name]["balanced_accuracy"] = \
            np.mean([self.measures[classifier_name][x]["balanced_accuracy"] for x in
                     context["classifiers"][classifier_name]["classes_names"]])

    #########################################################################################

    def g_means(self, classifier_name, context, *args):
        """
        Geometric mean as the sqrt of the sensibility*specificity
        """
        self.tnr(classifier_name, context)
        self.tpr(classifier_name, context)

        self.measures[classifier_name]["g_means"] = np.sqrt(
            np.dot(self.measures[classifier_name]["tnr"],
                   self.measures[classifier_name]["tpr"]))

        for class_text in context["classifiers"][classifier_name][
                "classes_names"]:
            self.measures[classifier_name][class_text]["g_means"] = \
                np.sqrt(np.dot(self.measures[classifier_name][class_text]["tnr"],
                               self.measures[classifier_name][class_text]["tpr"]))

    #########################################################################################

    def tnr(self, classifier_name, context, *args):
        """
        True Negative Rate
        """
        fp = self.measures[classifier_name]["fp"]
        tn = self.measures[classifier_name]["tn"]

        if tn + fp > 0:
            self.measures[classifier_name]["tnr"] = np.divide(tn, tn + fp)
        else:
            self.measures[classifier_name]["tnr"] = 0.0

        for class_text in context["classifiers"][classifier_name][
                "classes_names"]:
            fp = self.measures[classifier_name][class_text]["fp"]
            tn = self.measures[classifier_name][class_text]["tn"]

            if tn + fp > 0:
                self.measures[classifier_name]["tnr"] = np.divide(tn, tn + fp)
            else:
                self.measures[classifier_name]["tnr"] = 0.0

    #########################################################################################

    def tpr(self, classifier_name, context, *args):
        """
        True Positive Rate
        """
        tp = self.measures[classifier_name]["tp"]
        fn = self.measures[classifier_name]["fn"]

        if tp + fn > 0:
            self.measures[classifier_name]["tpr"] = np.divide(tp, tp + fn)
        else:
            self.measures[classifier_name]["tpr"] = 0.0

        for class_text in context["classifiers"][classifier_name][
                "classes_names"]:
            tp = self.measures[classifier_name][class_text]["tp"]
            fn = self.measures[classifier_name][class_text]["fn"]

            if tp + fn > 0:
                self.measures[classifier_name]["tpr"] = np.divide(tp, tp + fn)
            else:
                self.measures[classifier_name]["tpr"] = 0.0

    #########################################################################################
    @staticmethod
    def get_ytrue_ypred(context, information, classifier_name, pattern_kind):
        len_classes = len(context["classifiers"][context["classifier_list"][0]]
                          ["classes_names"])
        len_inputs = len(context["patterns"].patterns[classifier_name]
                         [pattern_kind][0]) - len_classes
        y_true = list(context["patterns"].patterns[classifier_name]
                      [pattern_kind][:,
                                     range(len_inputs, len_inputs +
                                           len_classes)])
        y_pred = information.info[classifier_name]["continuous_outputs"][
            pattern_kind]
        return y_true, y_pred

    #########################################################################################

    def explained_variance_score(self, classifier_name, context, information,
                                 pattern_kind):
        from sklearn.metrics import explained_variance_score

        y_true, y_pred = self.get_ytrue_ypred(context, information,
                                              classifier_name, pattern_kind)
        self.measures[classifier_name]["explained_variance_score"] = \
            explained_variance_score(y_true, y_pred)

    #########################################################################################

    def mean_absolute_error(self, classifier_name, context, information,
                            pattern_kind):
        from sklearn.metrics import mean_absolute_error

        y_true, y_pred = self.get_ytrue_ypred(context, information,
                                              classifier_name, pattern_kind)
        self.measures[classifier_name]["explained_variance_score"] = \
            mean_absolute_error(y_true, y_pred)

    #########################################################################################

    def mean_squared_error(self, classifier_name, context, information,
                           pattern_kind):
        from sklearn.metrics import mean_squared_error

        y_true, y_pred = self.get_ytrue_ypred(context, information,
                                              classifier_name, pattern_kind)
        self.measures[classifier_name]["mean_squared_error"] = \
            mean_squared_error(y_true, y_pred)

    #########################################################################################

    def r2_score(self, classifier_name, context, information, pattern_kind):
        from sklearn.metrics import r2_score

        y_true, y_pred = self.get_ytrue_ypred(context, information,
                                              classifier_name, pattern_kind)
        self.measures[classifier_name]["r2_score"] = \
            r2_score(y_true, y_pred)

    #########################################################################################
    @staticmethod
    def confusion_matrix(classifier_name, context, information, pattern_kind):
        from sklearn.metrics import confusion_matrix

        confusion_matrix(
            context["patterns"].patterns[classifier_name][pattern_kind],
            information.info[classifier_name]["discretized_outputs"]
            [pattern_kind],
            context["classifiers"][classifier_name]["classes_names"])

    #########################################################################################
    @staticmethod
    def matthews_corrcoef(classifier_name, context, information, pattern_kind):
        from sklearn.metrics import matthews_corrcoef

        matthews_corrcoef(
            context["patterns"].patterns[classifier_name][pattern_kind],
            information.info[classifier_name]["discretized_outputs"]
            [pattern_kind])

    #########################################################################################

    def hamming_loss(self, classifier_name, context, information,
                     pattern_kind):
        from sklearn.metrics import hamming_loss

        self.measures[classifier_name]["hamming_loss"] = \
            hamming_loss(
                context["patterns"].patterns[classifier_name][pattern_kind],
                information.info[classifier_name]["discretized_outputs"][pattern_kind])

    #########################################################################################

    def kappa(self, classifier_name, *args):
        self.measures[classifier_name]["kappa"] = \
            self.measures[classifier_name]["matrix"]['tp'] + self.measures[classifier_name]["matrix"]['tn']

    #########################################################################################

    def f_measure(self, classifier_name, *args):
        self.recall(classifier_name, *args)
        self.accuracy(classifier_name, *args)

        self.measures[classifier_name]["f_measure"] = \
            (2 * self.measures[classifier_name]["recall"] * self.measures[classifier_name]["accuracy"]) / \
            (self.measures[classifier_name]["recall"] + self.measures[classifier_name]["accuracy"])

    #########################################################################################

    def accuracy(self, classifier_name, *args):
        self.measures[classifier_name]["accuracy"] = \
            self.measures[classifier_name]['tp'] / (
                self.measures[classifier_name]['tp'] + self.measures[classifier_name]['fp'])

    #########################################################################################

    def error_rate(self, classifier_name, *args):
        self.measures[classifier_name]["error_rate"] = \
            self.measures[classifier_name]["matrix"]['tp'] + self.measures[classifier_name]["matrix"]['tn'] / \
                                                             (np.sum([self.measures[classifier_name]["matrix"][goodness]
                                                                      for goodness in
                                                                      self.measures[classifier_name]["matrix"].keys()]))

    #########################################################################################

    def accuracy_rate(self, classifier_name, *args):
        self.measures[classifier_name]["accuracy_rate"] = \
            self.measures[classifier_name]["matrix"]['fp'] + self.measures[classifier_name]["matrix"]['fn'] / \
                                                             (np.sum(
                                                                 [self.measures[classifier_name]["matrix"][goodness] for
                                                                  goodness in
                                                                  self.measures[classifier_name]["matrix"].keys()]))

    #########################################################################################

    def recall(self, classifier_name, *args):
        self.tpr(classifier_name, *args)
        self.measures[classifier_name]["recall"] = self.measures[
            classifier_name]["tpr"]

    #########################################################################################

    def fn_rate(self, classifier_name, *args):
        self.measures[classifier_name][
            "fn_rate"] = self.measures[classifier_name]["matrix"]['fn'] / (
                self.measures[classifier_name]["matrix"]['tp'] +
                self.measures[classifier_name]["matrix"]['fn'])

    #########################################################################################

    def fp_rate(self, classifier_name, *args):
        self.measures[classifier_name][
            "fp_rate"] = self.measures[classifier_name]["matrix"]['fp'] / (
                self.measures[classifier_name]["matrix"]['tn'] +
                self.measures[classifier_name]["matrix"]['fp'])

    #########################################################################################

    def auc(self, classifier_name, context, information, pattern_kind):
        from sklearn.metrics import roc_auc_score

        classes_names = context["classifiers"][classifier_name][
            "classes_names"]
        inputs = len(context["patterns"].patterns[classifier_name]
                     [pattern_kind][0]) - len(classes_names)
        self.measures[classifier_name]["auc"] = 0.0
        for i, class_name in enumerate(classes_names):
            self.measures[classifier_name][class_name]["auc"] = \
                roc_auc_score(context["patterns"].patterns[classifier_name][pattern_kind][:, inputs + i],
                              information.info[classifier_name]["continuous_outputs"][pattern_kind][:, i])
            self.measures[classifier_name]["auc"] += self.measures[
                classifier_name][class_name]["auc"]

        self.measures[classifier_name]["auc"] = np.divide(
            np.mean(self.measures[classifier_name]["auc"]), len(classes_names))

    #########################################################################################

    def std(self, classifier_name, context, *args):
        """
        Calculate the standard deviation of the classifier passed as args, for each kind of error.
        Thus, there is a std for false positive error, another to false positive error, etc.
        """

        self.measures[classifier_name]['dt_efp'] = np.std(
            self.measures[classifier_name]["matrix"]['efp'])
        self.measures[classifier_name]['dt_efn'] = np.std(
            self.measures[classifier_name]["matrix"]['efn'])
        self.measures[classifier_name]['dt_e'] = np.std(
            self.measures[classifier_name]["matrix"]['efp'] +
            self.measures[classifier_name]["matrix"]['efn'])

        for output_index, class_text in enumerate(
                context["classifiers"][classifier_name]["classes_names"]):
            self.measures[classifier_name][class_text]['dt_efp'] = \
                np.std(self.measures[classifier_name]["matrix"]["efp"], 0)[output_index]
            self.measures[classifier_name][class_text]['dt_efn'] = \
                np.std(self.measures[classifier_name]["matrix"]["efn"], 0)[output_index]
            self.measures[classifier_name][class_text]['dt_e'] = \
                np.std(self.measures[classifier_name]["matrix"]["e"], 0)[output_index]

    #############################################
    @staticmethod
    def __build_multiple_name(sub_list):
        name = ""
        if type(sub_list) != list:
            for x_tuple in sub_list:
                name = "+".join([x for x in x_tuple])
        else:
            for i, name_i in enumerate(sub_list):
                if i == len(sub_list) - 1:
                    name += name_i
                else:
                    name += name_i + "+"
        return name

    #############################################

    def correctly_classified(self, sub_list):
        correctly_classified = np.zeros(
            len(self.measures[sub_list[0]]["matrix"]["oracle_outputs"]))
        for i in range(
                len(self.measures[sub_list[0]]["matrix"]["oracle_outputs"])):
            for j, classifier_name in enumerate(sub_list):
                if (np.array(self.measures[classifier_name]["matrix"]
                             ["oracle_outputs"][i]) == np.ones(
                                 len(self.measures[classifier_name]["matrix"]
                                     ["oracle_outputs"][i]))).all():
                    correctly_classified[i] += 1

        return correctly_classified

    #############################################

    def interrater_agreement_k_non_pairwise(self, context, sub_list):
        error = 0.0
        correctly_classified = self.correctly_classified(sub_list)
        p = np.sum([self.measures[x]['E'] for x in self.measures if 'E' in self.measures[x]]) / \
            (len(sub_list) * len(self.measures[sub_list[0]]["matrix"]["oracle_outputs"]))

        for i in range(len(correctly_classified)):
            error += correctly_classified[i] * (len(sub_list) -
                                                correctly_classified[i])

        if p == 0.0:
            p = np.exp(100)

        error /= len(self.measures[sub_list[0]]["matrix"]
                     ["oracle_outputs"]) * (len(sub_list) - 1) * p * (1 - p)
        return 1 - error

    #############################################

    def difficulty(self, context, sub_list):
        error = 0.0
        correctly_classified = self.correctly_classified(sub_list)
        mean_errors = np.mean(correctly_classified)
        for i in range(len(correctly_classified)):
            error += np.power((correctly_classified[i] -
                               (correctly_classified[i] / mean_errors)), 2)
        error /= (len(self.measures[sub_list[0]]["matrix"]["oracle_outputs"]) *
                  np.power(len(sub_list), 2))
        return 1. - error

    #############################################

    def kohavi_wolpert(self, context, sub_list):
        error = 0.0
        correctly_classified = self.correctly_classified(sub_list)
        for i in range(len(correctly_classified)):
            error += correctly_classified[i] * (len(sub_list) -
                                                correctly_classified[i])
        error /= len(sub_list)

        error /= (len(self.measures[sub_list[0]]["matrix"]["oracle_outputs"]) *
                  np.power(len(sub_list), 2))
        return error

    #############################################

    def entropy(self, context, sub_list):
        Error = 0.0
        correctly_classified = self.correctly_classified(sub_list)
        for i in range(len(correctly_classified)):
            Error += (min(correctly_classified[i],
                          len(sub_list) - correctly_classified[i]) /
                      (len(sub_list) - np.ceil(len(sub_list) / 2.)))

        Error /= len(self.measures[sub_list[0]]["matrix"]["oracle_outputs"])
        return Error

    #############################################

    def diversity_non_pairwise_structure(self, context, function,
                                         classifier_list):
        for i, classifier_name in enumerate(classifier_list):
            if context["interactive"]["activate"]:
                sys.stdout.write(
                    "\r{0}>".format("Completed:%f%%" %
                                    ((float(i) / len(classifier_list)) * 100)))
                sys.stdout.flush()

            # name = self.__build_multiple_name(sub_list)

            self.measures[classifier_name][function] = \
                getattr(self, function)(context, context["classifiers"][classifier_name]["classifiers"])

    #############################################

    def diversity_pairwise_structure(self, context, function, classifier_list):
        for i, classifier_1 in enumerate(classifier_list):
            if context["interactive"]["activate"]:
                sys.stdout.write(
                    "\r{0}>".format("Completed:%f%%" %
                                    ((float(i) / len(classifier_list)) * 100)))
                sys.stdout.flush()
            for classifier_2 in context["classifiers"].keys():

                if "pairwise_diversity" in self.measures[classifier_2].keys() and function in \
                        self.measures[classifier_2][
                            "pairwise_diversity"].keys() and classifier_1 in \
                        self.measures[classifier_2]["pairwise_diversity"][
                            function].keys():

                    self.measures[classifier_1]["pairwise_diversity"][function][classifier_2] = \
                        self.measures[classifier_2]["pairwise_diversity"][function][classifier_1]

                else:
                    self.measures[classifier_1]["pairwise_diversity"][function][classifier_2] = \
                        getattr(self, function)(classifier_1, classifier_2, context)

            vector = [
                self.measures[classifier_1]["pairwise_diversity"][function][x]
                for x in self.measures[classifier_1]["pairwise_diversity"]
                [function].keys() if x != classifier_1
            ]
            self.measures[classifier_1]["pairwise_diversity"][function][
                "mean"] = np.mean(vector)
            self.measures[classifier_1]["pairwise_diversity"][function][
                "median"] = np.median(vector)
            self.measures[classifier_1]["pairwise_diversity"][function][
                "std"] = np.std(vector)
            self.measures[classifier_1]["pairwise_diversity"][function][
                "variance"] = np.var(vector)

    #############################################

    def error_correlation(self, classifier_1, classifier_2, context):
        return np.corrcoef(self.measures[classifier_1]["matrix"]["e"],
                           self.measures[classifier_2]["matrix"]["e"])[0][1]

    #############################################

    def n01(self, classifier_1, classifier_2):
        counter = 0
        for a, b in zip(
                self.measures[classifier_1]["matrix"]["oracle_outputs"],
                self.measures[classifier_2]["matrix"]["oracle_outputs"]):
            if np.sum(a) < len(a) and np.sum(b) == len(b):
                counter += 1
        return counter

    #############################################

    def n10(self, classifier_1, classifier_2):
        counter = 0
        for a, b in zip(
                self.measures[classifier_1]["matrix"]["oracle_outputs"],
                self.measures[classifier_2]["matrix"]["oracle_outputs"]):
            if np.sum(a) == len(a) and np.sum(b) < len(b):
                counter += 1
        return counter

    #############################################

    def n11(self, classifier_1, classifier_2):
        counter = 0
        for a, b in zip(
                self.measures[classifier_1]["matrix"]["oracle_outputs"],
                self.measures[classifier_2]["matrix"]["oracle_outputs"]):
            if (a == b).all() and np.sum(a) == len(a):
                counter += 1
        return counter

    #############################################

    def n00(self, classifier_1, classifier_2):
        counter = 0
        for a, b in zip(
                self.measures[classifier_1]["matrix"]["oracle_outputs"],
                self.measures[classifier_2]["matrix"]["oracle_outputs"]):
            if np.sum(b) < len(b) and np.sum(a) < len(a):
                counter += 1
        return counter

    #############################################

    def _n_values(self, classifier_1, classifier_2, context):
        #this results may be divided
        n11 = None
        n00 = None
        n10 = None
        n01 = None

        if context["results"]["to_file"]["diversity_study"]["exact_match"]:
            n11 = self.n11(classifier_1, classifier_2)
            n00 = self.n00(classifier_1, classifier_2)
            n10 = self.n10(classifier_1, classifier_2)
            n01 = self.n01(classifier_1, classifier_2)

        elif context["results"]["to_file"]["diversity_study"]["by_class"]:
            # TODO: change this part
            for i in range(
                    len(self.measures[classifier_1]["matrix"]["oracle_outputs"]
                        [0])):
                n11 = sum([
                    1 if x == y and x == 1 else 0 for x, y in zip(
                        self.measures[classifier_1]["matrix"]
                        ["oracle_outputs"], self.measures[classifier_2]
                        ["matrix"]["oracle_outputs"])
                ])
                n00 = sum([
                    1 if x == y and x == 0 else 0 for x, y in zip(
                        self.measures[classifier_1]["matrix"]
                        ["oracle_outputs"], self.measures[classifier_2]
                        ["matrix"]["oracle_outputs"])
                ])
                n01 = sum([
                    1 if x != y and x == 0 and y == 1 else 0 for x, y in zip(
                        self.measures[classifier_1]["matrix"]
                        ["oracle_outputs"], self.measures[classifier_2]
                        ["matrix"]["oracle_outputs"])
                ])
                n10 = sum([
                    1 if x != y and x == 1 and y == 0 else 0 for x, y in zip(
                        self.measures[classifier_1]["matrix"]
                        ["oracle_outputs"], self.measures[classifier_2]
                        ["matrix"]["oracle_outputs"])
                ])
        else:
            raise ValueError(
                "No option selected in diversity study: by class or by exact match"
            )

        return {"n11": n11, "N00": n00, "N01": n01, "N10": n10}

    #############################################

    def interrater_agreement_k(self, classifier_1, classifier_2, context):
        values = self._n_values(classifier_1, classifier_2, context)
        denominator = ((values["N11"] + values["N10"]) * (values["N01"] + values["N00"])) + \
                      ((values["N11"] + values["N01"]) * (values["N10"] + values["N00"]))
        numerator = 2 * ((values["N11"] * values["N00"]) -
                         (values["N01"] * values["N10"]))
        return numerator / denominator

    #############################################

    def q_statistic(self, classifier_1, classifier_2, context):
        values = self._n_values(classifier_1, classifier_2, context)
        denominator = values["N11"] * values["N00"] + values["N01"] * values[
            "N10"]
        if not denominator:
            denominator = 1
        return (values["N11"] * values["N00"] -
                values["N01"] * values["N10"]) / denominator

    #############################################

    def coefficient_p(self, classifier_1, classifier_2, context):
        values = self._n_values(classifier_1, classifier_2, context)
        denominator = np.sqrt(
            (values["N11"] + values["N10"]) * (values["N01"] + values["N00"]) *
            (values["N11"] + values["N01"]) * (values["N10"] + values["N00"]))
        if not denominator:
            denominator = 1
        return (values["N11"] * values["N00"] -
                values["N01"] * values["N10"]) / denominator

    #############################################

    def disagreement(self, classifier_1, classifier_2, context):
        values = self._n_values(classifier_1, classifier_2, context)
        denominator = values["N11"] * values["N00"] + values["N01"] + values[
            "N10"]
        if not denominator:
            denominator = 1
        return (values["N01"] + values["N10"]) / denominator

    #############################################

    def double_fault(self, classifier_1, classifier_2, context):
        values = self._n_values(classifier_1, classifier_2, context)
        denominator = values["N11"] + values["N10"] + values["N01"] + values[
            "N00"]
        if not denominator:
            denominator = 1
        return values["N00"] / denominator

    ################################################################

    def configuration_evaluation(self, context, classifier_name, information):
        """
        To be reconstructed into a abstraction model. Initialize the information of each classifier.
        """
        #information_class.automatic_threshold_determine(context,classifier_name)
        pattern_kind = "validation"
        self.rms(classifier_name, context, information, pattern_kind)

        name = classifier_name[:re.search(r'[A-Za-z]+[0-9]*', classifier_name).
                               end()]
        neurons = context["classifiers"][classifier_name]["configuration"][
            "neurons"][0]

        if len(self.measures[name]["evaluation"][neurons].keys()):
            self.measures[name]["evaluation"][neurons]['rms'].append(
                self.measures[classifier_name]['rms'][pattern_kind])
            self.measures[name]["evaluation"][neurons]['names'].append(
                classifier_name)
        else:
            self.measures[name]["evaluation"][neurons]['rms'] = []
            self.measures[name]["evaluation"][neurons]['rms'].append(
                self.measures[classifier_name]['rms'][pattern_kind])
            self.measures[name]["evaluation"][neurons]['names'] = []
            self.measures[name]["evaluation"][neurons]['names'].append(
                classifier_name)

    ####################################################

    def best_choice(self):
        """
        Select the best configuration of a NN classifier with the class attributes information.
        """
        for name in sorted([
                x for x in self.measures.keys()
                if "evaluation" in self.measures[x].keys()
        ]):
            self.measures[name]["selection"]["rms"] = [99999.0]
            self.measures[name]["selection"]["neurons"]["hidden"] = [0]
            self.measures[name]["selection"]["name"] = [""]

            for neuron in sorted(self.measures[name]["evaluation"].keys()):
                self.measures[name]["selection"]["neurons"][neuron][
                    "amount"] = 0
                rms_list, names_list = (list(t) for t in zip(*sorted(
                    zip(self.measures[name]["evaluation"][neuron]['rms'],
                        self.measures[name]["evaluation"][neuron]['names']))))

                mean_rms = np.mean(
                    self.measures[name]["evaluation"][neuron]['rms'])

                if mean_rms < self.measures[name]["selection"]["rms"][0]:
                    self.measures[name]["selection"]["rms"] = [mean_rms]
                    self.measures[name]["selection"]["neurons"]["hidden"] = [
                        neuron
                    ]
                    self.measures[name]["selection"]["neurons"][neuron][
                        "amount"] = 1
                    self.measures[name]["selection"]["names"] = \
                        [self.measures[name]["evaluation"][neuron]['names'][self.measures[name]["evaluation"][neuron][
                            'rms'].index(sorted(
                                self.measures[name]["evaluation"][neuron][
                                    'rms'])[0])]]

                elif mean_rms == self.measures[name]["selection"]["rms"][0]:
                    self.measures[name]["selection"]["rms"].append(mean_rms)
                    self.measures[name]["selection"]["neurons"][
                        "hidden"].append(neuron)
                    for i in range(
                            len(self.measures[name]["evaluation"][neuron]
                                ['rms'])):
                        if rms_list[i] == rms_list[0]:
                            self.measures[name]["selection"]["names"].append(
                                names_list[i])
                            self.measures[name]["selection"]["neurons"][
                                neuron]["amount"] += 1

    ################################################################
    @staticmethod
    def pre_forecasting_statistic(context, classifier_name, information,
                                  pattern_kind):
        len_classes = len(
            context["classifiers"][classifier_name]["classes_names"])
        len_inputs = len(context["patterns"].patterns[classifier_name]
                         [pattern_kind][0]) - len_classes
        classifier_outputs = information.info[classifier_name][
            "continuous_outputs"][pattern_kind]
        classifier_patterns = \
            context["patterns"].patterns[classifier_name][pattern_kind][:, (len_inputs - 1, len_inputs)]
        len_patterns = len(
            context["patterns"].patterns[classifier_name][pattern_kind])
        d_change_pred = np.zeros(len_patterns)
        d_change_true = np.zeros(len_patterns)

        for i, instance, outputs in zip(range(len_patterns),
                                        classifier_patterns,
                                        classifier_outputs):
            d_change_true[i] = instance[1] - instance[0]
            d_change_pred[i] = outputs[0] - instance[0]
        return d_change_pred, d_change_true

    ################################################################

    def tendency_accuracy(self, classifier_name, context, information,
                          pattern_kind):
        """
        Calculates the number of tends hits on a regression problem.
        The regression tolerance is a parameter added to avoid the errors due to overflow
        :param classifier_name:
        :param context:
        :param information:
        :param pattern_kind:
        :return:
        """
        array_change_pred, array_change_true = Statistics(
        ).pre_forecasting_statistic(context, classifier_name, information,
                                    pattern_kind)
        hits = np.zeros(len(array_change_pred))
        for i, d_change_pred, d_change_true in zip(
                range(len(array_change_pred)), array_change_pred,
                array_change_true):
            if d_change_pred * d_change_true > 0.0:
                hits[i] = 1.
            elif d_change_pred * d_change_true == 0.0:
                hits[i] = 1.
            else:
                if np.sqrt(np.abs(d_change_pred * d_change_true)
                           ) < context["regression_tolerance_tendency"]:
                    hits[i] = 1.
                else:
                    hits[i] = 0.

        self.measures[classifier_name]["tendency_accuracy"] = np.mean(hits)

    #########################################################################################

    def mase(self, classifier_name, context, information, pattern_kind):
        """
        Mean Absolute error. Returns the inverse of the mase with a denominator that sums 1 to the error.
        It is intended to give an error between 1 and 0, where the 1 is the lowest error and 0.0 the highest in order
        to be compatible ordering different measures in the presentations.
        :param classifier_name:
        :param context:
        :param information:
        :param pattern_kind:
        :return:
        """
        array_change_pred, array_change_true = self.pre_forecasting_statistic(
            context, classifier_name, information, pattern_kind)

        self.measures[classifier_name]["mase"] = np.divide(
            np.mean(np.absolute(array_change_pred)),
            np.mean(np.absolute(array_change_true)))