Python add示例

编程语言: Python

命名空间/包名称: extrap.modelers.modeler_options.modeler_options

方法/功能: add

hotexamples.com的示例: 4

Python add - 已找到4个示例。这些是从开源项目中提取的最受好评的extrap.modelers.modeler_options.modeler_options.add现实Python示例。您可以评价示例，以帮助我们提高示例质量。

示例#1

显示文件

文件： gpu_direct_multi_parameter_modeler.py 项目： Shadowjockey/extrap

class GPUDirectMultiParameterModeler(AbstractMultiParameterModeler,
                                     SingularModeler):
    """
    This class represents the modeler for multi parameter functions.
    In order to create a model measurements at least 5 points are needed.
    The result is either a constant function or one based on the PMNF.
    """

    NAME = 'GPU-Direct-Multi-Parameter'
    single_parameter_modeler: 'SingleParameterModeler'
    use_crossvalidation = modeler_options.add(True,
                                              bool,
                                              'Enables cross-validation',
                                              name='Cross-validation')
    allow_combinations_of_sums_and_products = modeler_options.add(
        True,
        bool,
        description="Allows models that consist of "
        "combinations of sums and products.")
    compare_with_RSS = modeler_options.add(
        False, bool,
        'If enabled the models are compared using their residual sum of squares '
        '(RSS) instead of their symmetric mean absolute percentage error (SMAPE)'
    )

    def __init__(self):
        """
        Initialize SingleParameterModeler object.
        """
        super().__init__(use_median=False,
                         single_parameter_modeler=single_parameter.Default())
        # value for the minimum number of measurement points required for modeling
        self.min_measurement_points = 5
        self.epsilon = 0.0005  # value for the minimum term contribution

    def create_model(self, measurements: Sequence[Measurement]):
        w = measurements[0].coordinate.dimensions + 1
        h = len(measurements)
        c_measurements = []
        for measurement in measurements:
            row = list(measurement.coordinate)
            row.append(measurement.
                       mean if not self.use_median else measurement.median)
            c_measurements += row
        cu_mppm = CDLL('./lib/libcuMppm.so')
        float_arr = c_float * (h * w)
        elements = float_arr(*measurements)
        elements_ptr = cast(elements, POINTER(c_float))
        cu_mppm.find_hypothesis.restype = CPUHypothesis
        cpu_measurements = CPUMatrix(w, h, elements_ptr)
        hypothesis = cu_mppm.find_hypothesis(byref(cpu_measurements))
        print(hypothesis)

示例#2

显示文件

class SingleParameterModeler(AbstractSingleParameterModeler, SingularModeler):
    """
    This class represents the modeler for single parameter functions.
    In order to create a model measurements at least 5 points are needed.
    The result is either a constant function or one based on the PMNF.
    """

    NAME = 'Basic'
    DESCRIPTION = "Modeler for single-parameter models; traverses the search-space of all defined hypotheses."

    allow_log_terms = modeler_options.add(
        True,
        bool,
        'Allows models with logarithmic terms',
        on_change=lambda self, v: self._exponents_changed())
    poly_exponents = modeler_options.add(
        '',
        str,
        'Set of polynomial exponents. Use comma separated list.',
        name='Polynomial',
        on_change=lambda self, v: self._exponents_changed())
    log_exponents = modeler_options.add(
        '',
        str,
        'Set of logarithmic exponents. Use comma separated list.',
        name='Logarithmic',
        on_change=lambda self, v: self._exponents_changed())
    retain_default_exponents = modeler_options.add(
        False,
        bool,
        'If set the default exponents are added to the given ones.',
        name='Retain default',
        on_change=lambda self, v: self._exponents_changed())
    force_combination_exponents = modeler_options.add(
        False,
        bool,
        'If set the exact combination of exponents is forced.',
        name='Force combination',
        on_change=lambda self, v: self._exponents_changed())
    allow_negative_exponents = modeler_options.add(
        False,
        bool,
        'If set adds neagtive exponents for strong scaling.',
        name='Negative exponents',
        on_change=lambda self, v: self._exponents_changed())
    modeler_options.group('Exponents', poly_exponents, log_exponents,
                          retain_default_exponents,
                          force_combination_exponents,
                          allow_negative_exponents)

    def __init__(self):
        """
        Initialize SingleParameterModeler object.
        """
        super().__init__(use_median=False)

        # value for the minimum number of measurement points required for modeling
        self.min_measurement_points = 5

        # create the building blocks for the hypothesis
        self.hypotheses_building_blocks: List[
            CompoundTerm] = self.create_default_building_blocks(
                self.allow_log_terms, self.allow_negative_exponents)

    def _exponents_changed(self):
        def parse_expos(expos):
            expos = expos.split(',')
            result = []
            for e in expos:
                try:
                    result.append(float(e) if '.' in e else int(e))
                except ValueError:
                    pass
            return result

        polyexpos = parse_expos(self.poly_exponents)
        logexpos = parse_expos(self.log_exponents)

        if len(polyexpos) > 0 or len(logexpos) > 0:
            self.hypotheses_building_blocks = self.generate_building_blocks(
                polyexpos, logexpos, self.force_combination_exponents)
            if self.retain_default_exponents:
                self.hypotheses_building_blocks.extend(
                    self.create_default_building_blocks(
                        self.allow_log_terms, self.allow_negative_exponents))
        else:
            self.hypotheses_building_blocks = self.create_default_building_blocks(
                self.allow_log_terms, self.allow_negative_exponents)

    def get_matching_hypotheses(self, measurements: Sequence[Measurement]):
        """Removes log terms from the returned hypotheses_building_blocks, if those cannot describe the measurements."""

        if self.are_measurements_log_capable(measurements,
                                             self.allow_negative_exponents):
            return self.hypotheses_building_blocks

        return [
            compound_term for compound_term in self.hypotheses_building_blocks
            if not any(t.term_type == "logarithm"
                       for t in compound_term.simple_terms)
        ]

    @staticmethod
    def create_default_building_blocks(allow_log_terms,
                                       allow_negative_exponents=False):
        """
        Creates the default building blocks for the single parameter hypothesis
        that will be used during the search for the best hypothesis.
        """

        if allow_log_terms:
            exponents = [(0, 1, 1), (0, 1, 2), (1, 4, 0), (1, 3, 0), (1, 4, 1),
                         (1, 3, 1), (1, 4, 2), (1, 3, 2), (1, 2, 0), (1, 2, 1),
                         (1, 2, 2), (2, 3, 0), (3, 4, 0), (2, 3, 1), (3, 4, 1),
                         (4, 5, 0), (2, 3, 2), (3, 4, 2), (1, 1, 0), (1, 1, 1),
                         (1, 1, 2), (5, 4, 0), (5, 4, 1), (4, 3, 0), (4, 3, 1),
                         (3, 2, 0), (3, 2, 1), (3, 2, 2), (5, 3, 0), (7, 4, 0),
                         (2, 1, 0), (2, 1, 1), (2, 1, 2), (9, 4, 0), (7, 3, 0),
                         (5, 2, 0), (5, 2, 1), (5, 2, 2), (8, 3, 0),
                         (11, 4, 0), (3, 1, 0), (3, 1, 1)]
            # These were used for relearn
            if allow_negative_exponents:
                exponents += [
                    (-0, 1, -1), (-0, 1, -2), (-1, 4, -1), (-1, 3, -1),
                    (-1, 4, -2), (-1, 3, -2), (-1, 2, -1), (-1, 2, -2),
                    (-2, 3, -1), (-3, 4, -1), (-2, 3, -2), (-3, 4, -2),
                    (-1, 1, -1), (-1, 1, -2), (-5, 4, -1), (-4, 3, -1),
                    (-3, 2, -1), (-3, 2, -2), (-2, 1, -1), (-2, 1, -2),
                    (-5, 2, -1), (-5, 2, -2), (-3, 1, -1)
                ]

        else:
            exponents = [(1, 4, 0), (1, 3, 0), (1, 2, 0), (2, 3, 0), (3, 4, 0),
                         (4, 5, 0), (1, 1, 0), (5, 4, 0), (4, 3, 0), (3, 2, 0),
                         (5, 3, 0), (7, 4, 0), (2, 1, 0), (9, 4, 0), (7, 3, 0),
                         (5, 2, 0), (8, 3, 0), (11, 4, 0), (3, 1, 0)]
            # These were used for relearn
            if allow_negative_exponents:
                exponents += [(-1, 4, 0), (-1, 3, 0), (-1, 2, 0), (-2, 3, 0),
                              (-3, 4, 0), (-4, 5, 0), (-1, 1, 0), (-5, 4, 0),
                              (-4, 3, 0), (-3, 2, 0), (-5, 3, 0), (-7, 4, 0),
                              (-2, 1, 0), (-9, 4, 0), (-7, 3, 0), (-5, 2, 0),
                              (-8, 3, 0), (-11, 4, 0), (-3, 1, 0)]

        hypotheses_building_blocks = [
            CompoundTerm.create(*e) for e in exponents
        ]
        # print the hypothesis building blocks, compound terms in debug mode
        if logging.getLogger().isEnabledFor(logging.DEBUG):
            parameter = Parameter('p')
            for i, compound_term in enumerate(hypotheses_building_blocks):
                logging.debug(
                    f"Compound term {i}: {compound_term.to_string(parameter)}")

        return hypotheses_building_blocks

    @staticmethod
    def generate_building_blocks(poly_exponents,
                                 log_exponents,
                                 force_combination=False):
        if force_combination:
            exponents = itertools.product(poly_exponents, log_exponents)
        else:
            exponents = itertools.chain(
                itertools.product(poly_exponents, [0]),
                itertools.product([0], log_exponents),
                itertools.product(poly_exponents, log_exponents))

        return [CompoundTerm.create(*e) for e in exponents]

    def build_hypotheses(self, measurements):
        """
        Builds the next hypothesis that should be analysed based on the given compound term.
        """
        hypotheses_building_blocks = self.get_matching_hypotheses(measurements)

        # search for the best hypothesis over all functions that can be build with the basic building blocks
        # using leave one out crossvalidation
        for i, compound_term in enumerate(hypotheses_building_blocks):
            # create next function that will be analyzed
            next_function = SingleParameterFunction(copy.copy(compound_term))

            # create single parameter hypothesis from function
            yield SingleParameterHypothesis(next_function, self.use_median)

    def create_model(self, measurements: Sequence[Measurement]):
        """
        Create a model for the given callpath and metric using the given data.
        """

        # check if the number of measurements satisfies the requirements of the modeler (>=5)
        if len(measurements) < self.min_measurement_points:
            warnings.warn(
                "Number of measurements for a parameter needs to be at least 5 in order to create a performance model."
            )
            # return None

        # create a constant model
        constant_hypothesis, constant_cost = self.create_constant_model(
            measurements)
        logging.debug("Constant model: " +
                      constant_hypothesis.function.to_string())
        logging.debug("Constant model cost: " + str(constant_cost))

        # use constant model when cost is 0
        if constant_cost == 0:
            logging.debug("Using constant model.")
            return Model(constant_hypothesis)

        # otherwise start searching for the best hypothesis based on the PMNF
        else:
            logging.debug("Searching for a single-parameter model.")
            # search for the best single parameter hypothesis
            hypotheses_generator = self.build_hypotheses(measurements)
            best_hypothesis = self.find_best_hypothesis(
                hypotheses_generator, constant_cost, measurements,
                constant_hypothesis)
            return Model(best_hypothesis)

示例#3

显示文件

文件： multi_parameter_modeler.py 项目： extra-p/extrap

class MultiParameterModeler(AbstractMultiParameterModeler, SingularModeler):
    """
    This class represents the modeler for multi parameter functions.
    In order to create a model measurements at least 5 points are needed.
    The result is either a constant function or one based on the PMNF.
    """

    NAME = 'Multi-Parameter'
    DESCRIPTION = "Modeler for multi-parameter models; supports full and sparse modeling."

    single_parameter_point_selection = modeler_options.add('auto', str, range=['auto', 'smallest', 'all'],
                                                           description="Sets the point selection method for creating "
                                                                       "the single-parameter models.")
    allow_combinations_of_sums_and_products = modeler_options.add(True, bool,
                                                                  description="Allows models that consist of "
                                                                              "combinations of sums and products.")
    compare_with_RSS = modeler_options.add(False, bool,
                                           'If enabled the models are compared using their residual sum of squares '
                                           '(RSS) instead of their symmetric mean absolute percentage error (SMAPE)')

    def __init__(self):
        """
        Initialize SingleParameterModeler object.
        """
        super().__init__(use_median=False, single_parameter_modeler=single_parameter.Default())
        # value for the minimum number of measurement points required for modeling
        self.min_measurement_points = 5
        self.epsilon = 0.0005  # value for the minimum term contribution

    def find_best_measurement_points(self, measurements: Sequence[Measurement]):
        """
        Determines the best measurement points for creating the single-parameter models.
        """

        def make_measurement(c, ms: Sequence[Measurement]):
            if len(ms) == 1:
                measurement = copy.copy(ms[0])
                measurement.coordinate = Coordinate(c)
                return measurement

            measurement = Measurement(Coordinate(c), ms[0].callpath, ms[0].metric, None)

            if self.use_median:
                value = np.mean([m.median for m in ms])
            else:
                value = np.mean([m.mean for m in ms])

            measurement.mean = value
            measurement.median = value
            if measurement.mean == 0:
                measurement.maximum = np.mean([m.maximum for m in ms])
                measurement.minimum = np.mean([m.minimum for m in ms])
                measurement.std = np.mean([m.std for m in ms])
            else:
                measurement.maximum = np.nanmean([m.maximum / m.mean for m in ms]) * measurement.mean
                measurement.minimum = np.nanmean([m.minimum / m.mean for m in ms]) * measurement.mean
                measurement.std = np.nanmean([m.std / m.mean for m in ms]) * measurement.mean

            return measurement

        dimensions = measurements[0].coordinate.dimensions

        dimension_groups = [
            {} for _ in range(dimensions)
        ]
        # group all measurements for each dimension, by their coordinates in the other dimensions
        for m in measurements:
            for p in range(dimensions):
                coordinate_p_ = m.coordinate.as_partial_tuple(p)
                groups_p_ = dimension_groups[p]
                if coordinate_p_ in groups_p_:
                    groups_p_[coordinate_p_].append(m)
                else:
                    groups_p_[coordinate_p_] = [m]

        use_all = True
        result_groups = []
        for p, grp in enumerate(dimension_groups):
            # select the longest groups, which cover the biggest range in each direction
            grp_values = iter(grp.values())
            first_ms = next(grp_values)
            current_max = len(first_ms)
            candidates = [first_ms]
            for ms in grp_values:
                len_ms = len(ms)
                if len_ms > current_max:
                    current_max = len_ms
                    candidates = [ms]
                    use_all = False
                elif len_ms == current_max:
                    candidates.append(ms)
                else:
                    use_all = False

            # regroup the longest groups by their coordinate in the current dimension
            groups = {}
            for c in candidates:
                for m in c:
                    coordinate_p_ = m.coordinate[p]
                    if coordinate_p_ in groups:
                        groups[coordinate_p_].append(m)
                    else:
                        groups[coordinate_p_] = [m]

            # remove all measurements from the group which cover not the same range as the inital group
            cms = iter(groups.values())
            first_list = next(cms)
            common_coords = set(m.coordinate.as_partial_tuple(p) for m in first_list)
            for g in cms:
                for i in reversed(range(len(g))):
                    if g[i].coordinate.as_partial_tuple(p) not in common_coords:
                        del g[i]

            result_groups.append(groups)

        if self.single_parameter_point_selection == 'all' and not use_all:
            warnings.warn(
                "Could not use all measurement points. At least 25 measurements are needed; one for each "
                "combination of parameters.")

        previous = np.seterr(invalid='ignore')
        combined_measurements = [[make_measurement(c, ms) for c, ms in grp.items() if ms]
                                 for p, grp in enumerate(result_groups)]
        np.seterr(**previous)

        return combined_measurements

    @staticmethod
    def find_first_measurement_points(measurements: Sequence[Measurement]):
        """
        This method returns the smallest possible measurements that should be used for creating
        the single-parameter models.
        """
        dimensions = measurements[0].coordinate.dimensions
        min_coordinate = [
            Coordinate(float('Inf') for _ in range(dimensions))
            for _ in range(dimensions)
        ]
        candidate_list = [[] for _ in range(dimensions)]
        for m in measurements:
            for p in range(dimensions):
                if m.coordinate.is_mostly_equal(min_coordinate[p], p):
                    m_sp = copy.copy(m)
                    m_sp.coordinate = Coordinate(m.coordinate[p])
                    candidate_list[p].append(m_sp)
                elif m.coordinate.is_mostly_lower(min_coordinate[p], p):
                    candidate_list[p].clear()
                    m_sp = copy.copy(m)
                    m_sp.coordinate = Coordinate(m.coordinate[p])
                    candidate_list[p].append(m_sp)
                    min_coordinate[p] = m.coordinate

        return candidate_list

    def create_model(self, measurements: Sequence[Measurement]):
        """
        Create a multi-parameter model using the given measurements.
        """
        if self.single_parameter_point_selection == 'auto' \
                or self.single_parameter_point_selection == 'all':
            measurements_sp = self.find_best_measurement_points(measurements)
        else:
            # use the first base points found for each parameter for modeling of the single parameter functions
            measurements_sp = self.find_first_measurement_points(measurements)
        # print(coordinates_list)

        # model all single parameter experiments using only the selected points from the step before
        # parameters = list(range(measurements[0].coordinate.dimensions))

        models = self.single_parameter_modeler.model(measurements_sp)
        functions = [m.hypothesis.function for m in models]

        # check if the number of measurements satisfies the reuqirements of the modeler (>=5)
        if len(measurements) < self.min_measurement_points:
            warnings.warn("Number of measurements for each parameter needs to be at least 5"
                          " in order to create a performance model.")
            # return None

        # get the coordinates for modeling
        # coordinates = list(dict.fromkeys(m.coordinate for m in measurements).keys())

        # use all available additional points for modeling the multi-parameter models
        constantCost = 0
        meanModel = 0

        for m in measurements:
            meanModel += m.value(self.use_median) / float(len(measurements))
        for m in measurements:
            constantCost += (m.value(self.use_median) - meanModel) * (m.value(self.use_median) - meanModel)

        # find out which parameters should be kept
        compound_term_pairs = []

        for i, function in enumerate(functions):
            terms = function.compound_terms
            if len(terms) > 0:
                compound_term = terms[0]
                compound_term.coefficient = 1
                compound_term_pairs.append((i, compound_term))

        # see if the function is constant
        if len(compound_term_pairs) == 0:
            constant_function = ConstantFunction()
            constant_function.constant_coefficient = meanModel
            constant_hypothesis = ConstantHypothesis(constant_function, self.use_median)
            constant_hypothesis.compute_cost(measurements)
            return Model(constant_hypothesis)

        # in case is only one parameter, make a single parameter function
        elif len(compound_term_pairs) == 1:
            param, compound_term = compound_term_pairs[0]
            multi_parameter_function = MultiParameterFunction()
            multi_parameter_term = MultiParameterTerm(compound_term_pairs[0])
            multi_parameter_term.coefficient = compound_term.coefficient
            multi_parameter_function.add_compound_term(multi_parameter_term)
            # constant_coefficient = functions[param].get_constant_coefficient()
            # multi_parameter_function.set_constant_coefficient(constant_coefficient)
            multi_parameter_hypothesis = MultiParameterHypothesis(multi_parameter_function, self.use_median)
            multi_parameter_hypothesis.compute_coefficients(measurements)
            multi_parameter_hypothesis.compute_cost(measurements)
            return Model(multi_parameter_hypothesis)

        # create multiplicative multi parameter term
        mult = MultiParameterTerm(*compound_term_pairs)

        # create additive multi parameter terms
        add = [MultiParameterTerm(ctp) for ctp in compound_term_pairs]

        # create multi parameter functions
        mp_functions = [
            # create f1 function a*b
            MultiParameterFunction(mult),
            # create f4 function a+b
            MultiParameterFunction(*add)
        ]

        if not self.allow_combinations_of_sums_and_products:
            pass
        # add Hypotheses for 2 parameter models
        elif len(compound_term_pairs) == 2:
            mp_functions += [
                # create f2 function a*b+a
                MultiParameterFunction(add[0], mult),
                # create f3 function a*b+b
                MultiParameterFunction(add[1], mult)
            ]
        # add Hypotheses for 3 parameter models
        elif len(compound_term_pairs) == 3:
            # create multiplicative multi parameter terms
            # x*y
            mult_x_y = MultiParameterTerm(compound_term_pairs[0], compound_term_pairs[1])
            # y*z
            mult_y_z = MultiParameterTerm(compound_term_pairs[1], compound_term_pairs[2])
            # x*z
            mult_x_z = MultiParameterTerm(compound_term_pairs[0], compound_term_pairs[2])

            # create multi parameter functions
            mp_functions += [
                # x*y*z+x
                MultiParameterFunction(mult, add[0]),
                # x*y*z+y
                MultiParameterFunction(mult, add[1]),
                # x*y*z+z
                MultiParameterFunction(mult, add[2]),

                # x*y*z+x*y
                MultiParameterFunction(mult, mult_x_y),
                # x*y*z+y*z
                MultiParameterFunction(mult, mult_y_z),
                # x*y*z+x*z
                MultiParameterFunction(mult, mult_x_z),

                # x*y*z+x*y+z
                MultiParameterFunction(mult, mult_x_y, add[2]),
                # x*y*z+y*z+x
                MultiParameterFunction(mult, mult_y_z, add[0]),
                # x*y*z+x*z+y
                MultiParameterFunction(mult, mult_x_z, add[1]),

                # x*y*z+x+y
                MultiParameterFunction(mult, add[0], add[1]),
                # x*y*z+x+z
                MultiParameterFunction(mult, add[0], add[2]),
                # x*y*z+y+z
                MultiParameterFunction(mult, add[1], add[2]),

                # x*y+z
                MultiParameterFunction(mult_x_y, add[2]),
                # x*y+z+y
                MultiParameterFunction(mult_x_y, add[2], add[1]),
                # x*y+z+x
                MultiParameterFunction(mult_x_y, add[2], add[0]),

                # x*z+y
                MultiParameterFunction(mult_x_z, add[1]),
                # x*z+y+x
                MultiParameterFunction(mult_x_z, add[1], add[0]),
                # x*z+y+z
                MultiParameterFunction(mult_x_z, add[1], add[2]),

                # y*z+x
                MultiParameterFunction(mult_y_z, add[0]),
                # y*z+x+y
                MultiParameterFunction(mult_y_z, add[0], add[1]),
                # y*z+x+z
                MultiParameterFunction(mult_y_z, add[0], add[2])
            ]

        # create the hypotheses from the functions
        hypotheses = [MultiParameterHypothesis(f, self.use_median)
                      for f in mp_functions]

        # select one function as the bestHypothesis for the start
        best_hypothesis = copy.deepcopy(hypotheses[0])
        best_hypothesis.compute_coefficients(measurements)
        best_hypothesis.compute_cost(measurements)
        best_hypothesis.compute_adjusted_rsquared(constantCost, measurements)

        logging.info(f"hypothesis 0: {best_hypothesis.function} --- smape: {best_hypothesis.SMAPE} "
                     f"--- ar2: {best_hypothesis.AR2} --- rss: {best_hypothesis.RSS} "
                     f"--- rrss: {best_hypothesis.rRSS} --- re: {best_hypothesis.RE}")

        # find the best hypothesis
        for i, hypothesis in enumerate(hypotheses):
            hypothesis.compute_coefficients(measurements)
            hypothesis.compute_cost(measurements)
            hypothesis.compute_adjusted_rsquared(constantCost, measurements)

            logging.info(f"hypothesis {i}: {hypothesis.function} --- smape: {hypothesis.SMAPE} "
                         f"--- ar2: {hypothesis.AR2} --- rss: {hypothesis.RSS} "
                         f"--- rrss: {hypothesis.rRSS} --- re: {hypothesis.RE}")

            term_contribution_big_enough = True
            # for all compound terms check if they are smaller than minimum allowed contribution
            for term in hypothesis.function.compound_terms:
                # ignore this hypothesis, since one of the terms contributes less than epsilon to the function
                if term.coefficient == 0 or hypothesis.calc_term_contribution(term, measurements) < self.epsilon:
                    term_contribution_big_enough = False
                    break

            if not term_contribution_big_enough:
                continue
            elif self.compare_with_RSS:
                if hypotheses[i].RSS < best_hypothesis.RSS:
                    best_hypothesis = copy.deepcopy(hypotheses[i])
            elif hypotheses[i].SMAPE < best_hypothesis.SMAPE:
                best_hypothesis = copy.deepcopy(hypotheses[i])

        # add the best found hypothesis to the model list
        model = Model(best_hypothesis)

        logging.info(f"best hypothesis: {best_hypothesis.function} --- smape: {best_hypothesis.SMAPE} "
                     f"--- ar2: {best_hypothesis.AR2} --- rss: {best_hypothesis.RSS} "
                     f"--- rrss: {best_hypothesis.rRSS} --- re: {best_hypothesis.RE}")

        return model

示例#4

显示文件

文件： abstract_base.py 项目： Shadowjockey/extrap

class AbstractSingleParameterModeler(AbstractModeler, ABC):
    CLEAN_CONSTANT_EPSILON = 1e-3  # minimum allowed value for a constant coefficient before it is set to 0

    allow_log_terms = modeler_options.add(True, bool, 'Allows models with logarithmic terms')
    use_crossvalidation = modeler_options.add(True, bool, 'Enables cross-validation', name='Cross-validation')
    compare_with_RSS = modeler_options.add(False, bool,
                                           'If enabled the models are compared using their residual sum of squares '
                                           '(RSS) instead of their symmetric mean absolute percentage error (SMAPE)')

    def __init__(self, use_median: bool):
        super().__init__(use_median)
        self.epsilon = 0.0005  # value for the minimum term contribution

    def compare_hypotheses(self, old: Hypothesis, new: SingleParameterHypothesis, measurements: Sequence[Measurement]):
        """
        Compares the best with the new hypothesis and decides which one is a better fit for the data.
        If the new hypothesis is better than the best one it becomes the best hypothesis.
        The choice is made based on the RSS or SMAPE.
        """
        if old == MAX_HYPOTHESIS:
            return True

        # get the compound terms of the new hypothesis
        compound_terms = new.function.compound_terms

        previous = numpy.seterr(divide='ignore', invalid='ignore')
        # for all compound terms check if they are smaller than minimum allowed contribution
        for term in compound_terms:
            # ignore this hypothesis, since one of the terms contributes less than epsilon to the function
            if term.coefficient == 0 or new.calc_term_contribution(term, measurements) < self.epsilon:
                return False
        numpy.seterr(**previous)

        # print smapes in debug mode
        logging.debug("next hypothesis SMAPE: " + str(new.SMAPE) + ' RSS:' + str(new.RSS))
        logging.debug("best hypothesis SMAPE: " + str(old.SMAPE) + ' RSS:' + str(old.RSS))
        if self.compare_with_RSS:
            return new.RSS < old.RSS
        return new.SMAPE < old.SMAPE

    def create_constant_model(self, measurements: Sequence[Measurement]) -> Tuple[ConstantHypothesis, float]:
        """
        Creates a constant model that fits the data using a ConstantFunction.
        """
        # compute the constant coefficient
        mean_model = sum(m.value(self.use_median) / len(measurements) for m in measurements)

        # create a constant function
        constant_function = ConstantFunction(mean_model)
        constant_hypothesis = ConstantHypothesis(constant_function, self.use_median)

        # compute cost of the constant model
        constant_hypothesis.compute_cost(measurements)
        constant_cost = constant_hypothesis.RSS

        return constant_hypothesis, constant_cost

    def find_best_hypothesis(self, candidate_hypotheses: Iterable[SH], constant_cost: float,
                             measurements: Sequence[Measurement], current_best: H = MAX_HYPOTHESIS) -> Union[SH, H]:
        """
        Searches for the best single parameter hypothesis and returns it.
        """

        # currently the constant hypothesis is the best hypothesis
        best_hypothesis = current_best

        # search for the best hypothesis over all functions that can be build with the basic building blocks

        for i, next_hypothesis in enumerate(candidate_hypotheses):

            if self.use_crossvalidation:
                # use leave one out crossvalidation
                # cycle through points and leave one out per iteration
                for element_id in range(len(measurements)):
                    # copy measurements to create the training sets
                    training_measurements = list(measurements)

                    # remove one element the set
                    training_measurements.pop(element_id)

                    # validation set
                    validation_measurement = measurements[element_id]

                    # compute the model coefficients based on the training data
                    next_hypothesis.compute_coefficients(training_measurements)

                    # check if the constant coefficient should actually be 0
                    next_hypothesis.clean_constant_coefficient(self.epsilon, training_measurements)

                    # compute the cost of the single-parameter model for the validation data
                    next_hypothesis.compute_cost(training_measurements, validation_measurement)

                # compute the model coefficients using all data
                next_hypothesis.compute_coefficients(measurements)
                logging.debug(f"single-parameter model {i}: " + next_hypothesis.function.to_string(Parameter('p')))
            else:
                # compute the model coefficients based on the training data
                next_hypothesis.compute_coefficients(measurements)

                # check if the constant coefficient should actually be 0
                next_hypothesis.clean_constant_coefficient(
                    self.CLEAN_CONSTANT_EPSILON, measurements)

                # compute the cost of the single-parameter model for the validation data
                next_hypothesis.compute_cost_all_points(measurements)

            # compute the AR2 for the hypothesis
            next_hypothesis.compute_adjusted_rsquared(constant_cost, measurements)

            # check if hypothesis is valid
            if not next_hypothesis.is_valid():
                logging.info(
                    "Numeric imprecision found. Model is invalid and will be ignored.")

            # compare the new hypothesis with the best hypothesis
            elif self.compare_hypotheses(best_hypothesis, next_hypothesis, measurements):
                best_hypothesis = next_hypothesis

        return best_hypothesis

    @staticmethod
    def are_measurements_log_capable(measurements, check_negative_exponents=False):
        """ Checks if logarithmic models can be used to describe the measurements.
            If the parameter values are smaller than 1 log terms are not allowed."""

        if check_negative_exponents:
            for measurement in measurements:
                for value in measurement.coordinate:
                    if value <= 1.0:
                        return False
        else:
            for measurement in measurements:
                for value in measurement.coordinate:
                    if value < 1.0:
                        return False

        return True