class GPUDirectMultiParameterModeler(AbstractMultiParameterModeler, SingularModeler): """ This class represents the modeler for multi parameter functions. In order to create a model measurements at least 5 points are needed. The result is either a constant function or one based on the PMNF. """ NAME = 'GPU-Direct-Multi-Parameter' single_parameter_modeler: 'SingleParameterModeler' use_crossvalidation = modeler_options.add(True, bool, 'Enables cross-validation', name='Cross-validation') allow_combinations_of_sums_and_products = modeler_options.add( True, bool, description="Allows models that consist of " "combinations of sums and products.") compare_with_RSS = modeler_options.add( False, bool, 'If enabled the models are compared using their residual sum of squares ' '(RSS) instead of their symmetric mean absolute percentage error (SMAPE)' ) def __init__(self): """ Initialize SingleParameterModeler object. """ super().__init__(use_median=False, single_parameter_modeler=single_parameter.Default()) # value for the minimum number of measurement points required for modeling self.min_measurement_points = 5 self.epsilon = 0.0005 # value for the minimum term contribution def create_model(self, measurements: Sequence[Measurement]): w = measurements[0].coordinate.dimensions + 1 h = len(measurements) c_measurements = [] for measurement in measurements: row = list(measurement.coordinate) row.append(measurement. mean if not self.use_median else measurement.median) c_measurements += row cu_mppm = CDLL('./lib/libcuMppm.so') float_arr = c_float * (h * w) elements = float_arr(*measurements) elements_ptr = cast(elements, POINTER(c_float)) cu_mppm.find_hypothesis.restype = CPUHypothesis cpu_measurements = CPUMatrix(w, h, elements_ptr) hypothesis = cu_mppm.find_hypothesis(byref(cpu_measurements)) print(hypothesis)
class SingleParameterModeler(AbstractSingleParameterModeler, SingularModeler): """ This class represents the modeler for single parameter functions. In order to create a model measurements at least 5 points are needed. The result is either a constant function or one based on the PMNF. """ NAME = 'Basic' DESCRIPTION = "Modeler for single-parameter models; traverses the search-space of all defined hypotheses." allow_log_terms = modeler_options.add( True, bool, 'Allows models with logarithmic terms', on_change=lambda self, v: self._exponents_changed()) poly_exponents = modeler_options.add( '', str, 'Set of polynomial exponents. Use comma separated list.', name='Polynomial', on_change=lambda self, v: self._exponents_changed()) log_exponents = modeler_options.add( '', str, 'Set of logarithmic exponents. Use comma separated list.', name='Logarithmic', on_change=lambda self, v: self._exponents_changed()) retain_default_exponents = modeler_options.add( False, bool, 'If set the default exponents are added to the given ones.', name='Retain default', on_change=lambda self, v: self._exponents_changed()) force_combination_exponents = modeler_options.add( False, bool, 'If set the exact combination of exponents is forced.', name='Force combination', on_change=lambda self, v: self._exponents_changed()) allow_negative_exponents = modeler_options.add( False, bool, 'If set adds neagtive exponents for strong scaling.', name='Negative exponents', on_change=lambda self, v: self._exponents_changed()) modeler_options.group('Exponents', poly_exponents, log_exponents, retain_default_exponents, force_combination_exponents, allow_negative_exponents) def __init__(self): """ Initialize SingleParameterModeler object. """ super().__init__(use_median=False) # value for the minimum number of measurement points required for modeling self.min_measurement_points = 5 # create the building blocks for the hypothesis self.hypotheses_building_blocks: List[ CompoundTerm] = self.create_default_building_blocks( self.allow_log_terms, self.allow_negative_exponents) def _exponents_changed(self): def parse_expos(expos): expos = expos.split(',') result = [] for e in expos: try: result.append(float(e) if '.' in e else int(e)) except ValueError: pass return result polyexpos = parse_expos(self.poly_exponents) logexpos = parse_expos(self.log_exponents) if len(polyexpos) > 0 or len(logexpos) > 0: self.hypotheses_building_blocks = self.generate_building_blocks( polyexpos, logexpos, self.force_combination_exponents) if self.retain_default_exponents: self.hypotheses_building_blocks.extend( self.create_default_building_blocks( self.allow_log_terms, self.allow_negative_exponents)) else: self.hypotheses_building_blocks = self.create_default_building_blocks( self.allow_log_terms, self.allow_negative_exponents) def get_matching_hypotheses(self, measurements: Sequence[Measurement]): """Removes log terms from the returned hypotheses_building_blocks, if those cannot describe the measurements.""" if self.are_measurements_log_capable(measurements, self.allow_negative_exponents): return self.hypotheses_building_blocks return [ compound_term for compound_term in self.hypotheses_building_blocks if not any(t.term_type == "logarithm" for t in compound_term.simple_terms) ] @staticmethod def create_default_building_blocks(allow_log_terms, allow_negative_exponents=False): """ Creates the default building blocks for the single parameter hypothesis that will be used during the search for the best hypothesis. """ if allow_log_terms: exponents = [(0, 1, 1), (0, 1, 2), (1, 4, 0), (1, 3, 0), (1, 4, 1), (1, 3, 1), (1, 4, 2), (1, 3, 2), (1, 2, 0), (1, 2, 1), (1, 2, 2), (2, 3, 0), (3, 4, 0), (2, 3, 1), (3, 4, 1), (4, 5, 0), (2, 3, 2), (3, 4, 2), (1, 1, 0), (1, 1, 1), (1, 1, 2), (5, 4, 0), (5, 4, 1), (4, 3, 0), (4, 3, 1), (3, 2, 0), (3, 2, 1), (3, 2, 2), (5, 3, 0), (7, 4, 0), (2, 1, 0), (2, 1, 1), (2, 1, 2), (9, 4, 0), (7, 3, 0), (5, 2, 0), (5, 2, 1), (5, 2, 2), (8, 3, 0), (11, 4, 0), (3, 1, 0), (3, 1, 1)] # These were used for relearn if allow_negative_exponents: exponents += [ (-0, 1, -1), (-0, 1, -2), (-1, 4, -1), (-1, 3, -1), (-1, 4, -2), (-1, 3, -2), (-1, 2, -1), (-1, 2, -2), (-2, 3, -1), (-3, 4, -1), (-2, 3, -2), (-3, 4, -2), (-1, 1, -1), (-1, 1, -2), (-5, 4, -1), (-4, 3, -1), (-3, 2, -1), (-3, 2, -2), (-2, 1, -1), (-2, 1, -2), (-5, 2, -1), (-5, 2, -2), (-3, 1, -1) ] else: exponents = [(1, 4, 0), (1, 3, 0), (1, 2, 0), (2, 3, 0), (3, 4, 0), (4, 5, 0), (1, 1, 0), (5, 4, 0), (4, 3, 0), (3, 2, 0), (5, 3, 0), (7, 4, 0), (2, 1, 0), (9, 4, 0), (7, 3, 0), (5, 2, 0), (8, 3, 0), (11, 4, 0), (3, 1, 0)] # These were used for relearn if allow_negative_exponents: exponents += [(-1, 4, 0), (-1, 3, 0), (-1, 2, 0), (-2, 3, 0), (-3, 4, 0), (-4, 5, 0), (-1, 1, 0), (-5, 4, 0), (-4, 3, 0), (-3, 2, 0), (-5, 3, 0), (-7, 4, 0), (-2, 1, 0), (-9, 4, 0), (-7, 3, 0), (-5, 2, 0), (-8, 3, 0), (-11, 4, 0), (-3, 1, 0)] hypotheses_building_blocks = [ CompoundTerm.create(*e) for e in exponents ] # print the hypothesis building blocks, compound terms in debug mode if logging.getLogger().isEnabledFor(logging.DEBUG): parameter = Parameter('p') for i, compound_term in enumerate(hypotheses_building_blocks): logging.debug( f"Compound term {i}: {compound_term.to_string(parameter)}") return hypotheses_building_blocks @staticmethod def generate_building_blocks(poly_exponents, log_exponents, force_combination=False): if force_combination: exponents = itertools.product(poly_exponents, log_exponents) else: exponents = itertools.chain( itertools.product(poly_exponents, [0]), itertools.product([0], log_exponents), itertools.product(poly_exponents, log_exponents)) return [CompoundTerm.create(*e) for e in exponents] def build_hypotheses(self, measurements): """ Builds the next hypothesis that should be analysed based on the given compound term. """ hypotheses_building_blocks = self.get_matching_hypotheses(measurements) # search for the best hypothesis over all functions that can be build with the basic building blocks # using leave one out crossvalidation for i, compound_term in enumerate(hypotheses_building_blocks): # create next function that will be analyzed next_function = SingleParameterFunction(copy.copy(compound_term)) # create single parameter hypothesis from function yield SingleParameterHypothesis(next_function, self.use_median) def create_model(self, measurements: Sequence[Measurement]): """ Create a model for the given callpath and metric using the given data. """ # check if the number of measurements satisfies the requirements of the modeler (>=5) if len(measurements) < self.min_measurement_points: warnings.warn( "Number of measurements for a parameter needs to be at least 5 in order to create a performance model." ) # return None # create a constant model constant_hypothesis, constant_cost = self.create_constant_model( measurements) logging.debug("Constant model: " + constant_hypothesis.function.to_string()) logging.debug("Constant model cost: " + str(constant_cost)) # use constant model when cost is 0 if constant_cost == 0: logging.debug("Using constant model.") return Model(constant_hypothesis) # otherwise start searching for the best hypothesis based on the PMNF else: logging.debug("Searching for a single-parameter model.") # search for the best single parameter hypothesis hypotheses_generator = self.build_hypotheses(measurements) best_hypothesis = self.find_best_hypothesis( hypotheses_generator, constant_cost, measurements, constant_hypothesis) return Model(best_hypothesis)
class MultiParameterModeler(AbstractMultiParameterModeler, SingularModeler): """ This class represents the modeler for multi parameter functions. In order to create a model measurements at least 5 points are needed. The result is either a constant function or one based on the PMNF. """ NAME = 'Multi-Parameter' DESCRIPTION = "Modeler for multi-parameter models; supports full and sparse modeling." single_parameter_point_selection = modeler_options.add('auto', str, range=['auto', 'smallest', 'all'], description="Sets the point selection method for creating " "the single-parameter models.") allow_combinations_of_sums_and_products = modeler_options.add(True, bool, description="Allows models that consist of " "combinations of sums and products.") compare_with_RSS = modeler_options.add(False, bool, 'If enabled the models are compared using their residual sum of squares ' '(RSS) instead of their symmetric mean absolute percentage error (SMAPE)') def __init__(self): """ Initialize SingleParameterModeler object. """ super().__init__(use_median=False, single_parameter_modeler=single_parameter.Default()) # value for the minimum number of measurement points required for modeling self.min_measurement_points = 5 self.epsilon = 0.0005 # value for the minimum term contribution def find_best_measurement_points(self, measurements: Sequence[Measurement]): """ Determines the best measurement points for creating the single-parameter models. """ def make_measurement(c, ms: Sequence[Measurement]): if len(ms) == 1: measurement = copy.copy(ms[0]) measurement.coordinate = Coordinate(c) return measurement measurement = Measurement(Coordinate(c), ms[0].callpath, ms[0].metric, None) if self.use_median: value = np.mean([m.median for m in ms]) else: value = np.mean([m.mean for m in ms]) measurement.mean = value measurement.median = value if measurement.mean == 0: measurement.maximum = np.mean([m.maximum for m in ms]) measurement.minimum = np.mean([m.minimum for m in ms]) measurement.std = np.mean([m.std for m in ms]) else: measurement.maximum = np.nanmean([m.maximum / m.mean for m in ms]) * measurement.mean measurement.minimum = np.nanmean([m.minimum / m.mean for m in ms]) * measurement.mean measurement.std = np.nanmean([m.std / m.mean for m in ms]) * measurement.mean return measurement dimensions = measurements[0].coordinate.dimensions dimension_groups = [ {} for _ in range(dimensions) ] # group all measurements for each dimension, by their coordinates in the other dimensions for m in measurements: for p in range(dimensions): coordinate_p_ = m.coordinate.as_partial_tuple(p) groups_p_ = dimension_groups[p] if coordinate_p_ in groups_p_: groups_p_[coordinate_p_].append(m) else: groups_p_[coordinate_p_] = [m] use_all = True result_groups = [] for p, grp in enumerate(dimension_groups): # select the longest groups, which cover the biggest range in each direction grp_values = iter(grp.values()) first_ms = next(grp_values) current_max = len(first_ms) candidates = [first_ms] for ms in grp_values: len_ms = len(ms) if len_ms > current_max: current_max = len_ms candidates = [ms] use_all = False elif len_ms == current_max: candidates.append(ms) else: use_all = False # regroup the longest groups by their coordinate in the current dimension groups = {} for c in candidates: for m in c: coordinate_p_ = m.coordinate[p] if coordinate_p_ in groups: groups[coordinate_p_].append(m) else: groups[coordinate_p_] = [m] # remove all measurements from the group which cover not the same range as the inital group cms = iter(groups.values()) first_list = next(cms) common_coords = set(m.coordinate.as_partial_tuple(p) for m in first_list) for g in cms: for i in reversed(range(len(g))): if g[i].coordinate.as_partial_tuple(p) not in common_coords: del g[i] result_groups.append(groups) if self.single_parameter_point_selection == 'all' and not use_all: warnings.warn( "Could not use all measurement points. At least 25 measurements are needed; one for each " "combination of parameters.") previous = np.seterr(invalid='ignore') combined_measurements = [[make_measurement(c, ms) for c, ms in grp.items() if ms] for p, grp in enumerate(result_groups)] np.seterr(**previous) return combined_measurements @staticmethod def find_first_measurement_points(measurements: Sequence[Measurement]): """ This method returns the smallest possible measurements that should be used for creating the single-parameter models. """ dimensions = measurements[0].coordinate.dimensions min_coordinate = [ Coordinate(float('Inf') for _ in range(dimensions)) for _ in range(dimensions) ] candidate_list = [[] for _ in range(dimensions)] for m in measurements: for p in range(dimensions): if m.coordinate.is_mostly_equal(min_coordinate[p], p): m_sp = copy.copy(m) m_sp.coordinate = Coordinate(m.coordinate[p]) candidate_list[p].append(m_sp) elif m.coordinate.is_mostly_lower(min_coordinate[p], p): candidate_list[p].clear() m_sp = copy.copy(m) m_sp.coordinate = Coordinate(m.coordinate[p]) candidate_list[p].append(m_sp) min_coordinate[p] = m.coordinate return candidate_list def create_model(self, measurements: Sequence[Measurement]): """ Create a multi-parameter model using the given measurements. """ if self.single_parameter_point_selection == 'auto' \ or self.single_parameter_point_selection == 'all': measurements_sp = self.find_best_measurement_points(measurements) else: # use the first base points found for each parameter for modeling of the single parameter functions measurements_sp = self.find_first_measurement_points(measurements) # print(coordinates_list) # model all single parameter experiments using only the selected points from the step before # parameters = list(range(measurements[0].coordinate.dimensions)) models = self.single_parameter_modeler.model(measurements_sp) functions = [m.hypothesis.function for m in models] # check if the number of measurements satisfies the reuqirements of the modeler (>=5) if len(measurements) < self.min_measurement_points: warnings.warn("Number of measurements for each parameter needs to be at least 5" " in order to create a performance model.") # return None # get the coordinates for modeling # coordinates = list(dict.fromkeys(m.coordinate for m in measurements).keys()) # use all available additional points for modeling the multi-parameter models constantCost = 0 meanModel = 0 for m in measurements: meanModel += m.value(self.use_median) / float(len(measurements)) for m in measurements: constantCost += (m.value(self.use_median) - meanModel) * (m.value(self.use_median) - meanModel) # find out which parameters should be kept compound_term_pairs = [] for i, function in enumerate(functions): terms = function.compound_terms if len(terms) > 0: compound_term = terms[0] compound_term.coefficient = 1 compound_term_pairs.append((i, compound_term)) # see if the function is constant if len(compound_term_pairs) == 0: constant_function = ConstantFunction() constant_function.constant_coefficient = meanModel constant_hypothesis = ConstantHypothesis(constant_function, self.use_median) constant_hypothesis.compute_cost(measurements) return Model(constant_hypothesis) # in case is only one parameter, make a single parameter function elif len(compound_term_pairs) == 1: param, compound_term = compound_term_pairs[0] multi_parameter_function = MultiParameterFunction() multi_parameter_term = MultiParameterTerm(compound_term_pairs[0]) multi_parameter_term.coefficient = compound_term.coefficient multi_parameter_function.add_compound_term(multi_parameter_term) # constant_coefficient = functions[param].get_constant_coefficient() # multi_parameter_function.set_constant_coefficient(constant_coefficient) multi_parameter_hypothesis = MultiParameterHypothesis(multi_parameter_function, self.use_median) multi_parameter_hypothesis.compute_coefficients(measurements) multi_parameter_hypothesis.compute_cost(measurements) return Model(multi_parameter_hypothesis) # create multiplicative multi parameter term mult = MultiParameterTerm(*compound_term_pairs) # create additive multi parameter terms add = [MultiParameterTerm(ctp) for ctp in compound_term_pairs] # create multi parameter functions mp_functions = [ # create f1 function a*b MultiParameterFunction(mult), # create f4 function a+b MultiParameterFunction(*add) ] if not self.allow_combinations_of_sums_and_products: pass # add Hypotheses for 2 parameter models elif len(compound_term_pairs) == 2: mp_functions += [ # create f2 function a*b+a MultiParameterFunction(add[0], mult), # create f3 function a*b+b MultiParameterFunction(add[1], mult) ] # add Hypotheses for 3 parameter models elif len(compound_term_pairs) == 3: # create multiplicative multi parameter terms # x*y mult_x_y = MultiParameterTerm(compound_term_pairs[0], compound_term_pairs[1]) # y*z mult_y_z = MultiParameterTerm(compound_term_pairs[1], compound_term_pairs[2]) # x*z mult_x_z = MultiParameterTerm(compound_term_pairs[0], compound_term_pairs[2]) # create multi parameter functions mp_functions += [ # x*y*z+x MultiParameterFunction(mult, add[0]), # x*y*z+y MultiParameterFunction(mult, add[1]), # x*y*z+z MultiParameterFunction(mult, add[2]), # x*y*z+x*y MultiParameterFunction(mult, mult_x_y), # x*y*z+y*z MultiParameterFunction(mult, mult_y_z), # x*y*z+x*z MultiParameterFunction(mult, mult_x_z), # x*y*z+x*y+z MultiParameterFunction(mult, mult_x_y, add[2]), # x*y*z+y*z+x MultiParameterFunction(mult, mult_y_z, add[0]), # x*y*z+x*z+y MultiParameterFunction(mult, mult_x_z, add[1]), # x*y*z+x+y MultiParameterFunction(mult, add[0], add[1]), # x*y*z+x+z MultiParameterFunction(mult, add[0], add[2]), # x*y*z+y+z MultiParameterFunction(mult, add[1], add[2]), # x*y+z MultiParameterFunction(mult_x_y, add[2]), # x*y+z+y MultiParameterFunction(mult_x_y, add[2], add[1]), # x*y+z+x MultiParameterFunction(mult_x_y, add[2], add[0]), # x*z+y MultiParameterFunction(mult_x_z, add[1]), # x*z+y+x MultiParameterFunction(mult_x_z, add[1], add[0]), # x*z+y+z MultiParameterFunction(mult_x_z, add[1], add[2]), # y*z+x MultiParameterFunction(mult_y_z, add[0]), # y*z+x+y MultiParameterFunction(mult_y_z, add[0], add[1]), # y*z+x+z MultiParameterFunction(mult_y_z, add[0], add[2]) ] # create the hypotheses from the functions hypotheses = [MultiParameterHypothesis(f, self.use_median) for f in mp_functions] # select one function as the bestHypothesis for the start best_hypothesis = copy.deepcopy(hypotheses[0]) best_hypothesis.compute_coefficients(measurements) best_hypothesis.compute_cost(measurements) best_hypothesis.compute_adjusted_rsquared(constantCost, measurements) logging.info(f"hypothesis 0: {best_hypothesis.function} --- smape: {best_hypothesis.SMAPE} " f"--- ar2: {best_hypothesis.AR2} --- rss: {best_hypothesis.RSS} " f"--- rrss: {best_hypothesis.rRSS} --- re: {best_hypothesis.RE}") # find the best hypothesis for i, hypothesis in enumerate(hypotheses): hypothesis.compute_coefficients(measurements) hypothesis.compute_cost(measurements) hypothesis.compute_adjusted_rsquared(constantCost, measurements) logging.info(f"hypothesis {i}: {hypothesis.function} --- smape: {hypothesis.SMAPE} " f"--- ar2: {hypothesis.AR2} --- rss: {hypothesis.RSS} " f"--- rrss: {hypothesis.rRSS} --- re: {hypothesis.RE}") term_contribution_big_enough = True # for all compound terms check if they are smaller than minimum allowed contribution for term in hypothesis.function.compound_terms: # ignore this hypothesis, since one of the terms contributes less than epsilon to the function if term.coefficient == 0 or hypothesis.calc_term_contribution(term, measurements) < self.epsilon: term_contribution_big_enough = False break if not term_contribution_big_enough: continue elif self.compare_with_RSS: if hypotheses[i].RSS < best_hypothesis.RSS: best_hypothesis = copy.deepcopy(hypotheses[i]) elif hypotheses[i].SMAPE < best_hypothesis.SMAPE: best_hypothesis = copy.deepcopy(hypotheses[i]) # add the best found hypothesis to the model list model = Model(best_hypothesis) logging.info(f"best hypothesis: {best_hypothesis.function} --- smape: {best_hypothesis.SMAPE} " f"--- ar2: {best_hypothesis.AR2} --- rss: {best_hypothesis.RSS} " f"--- rrss: {best_hypothesis.rRSS} --- re: {best_hypothesis.RE}") return model
class AbstractSingleParameterModeler(AbstractModeler, ABC): CLEAN_CONSTANT_EPSILON = 1e-3 # minimum allowed value for a constant coefficient before it is set to 0 allow_log_terms = modeler_options.add(True, bool, 'Allows models with logarithmic terms') use_crossvalidation = modeler_options.add(True, bool, 'Enables cross-validation', name='Cross-validation') compare_with_RSS = modeler_options.add(False, bool, 'If enabled the models are compared using their residual sum of squares ' '(RSS) instead of their symmetric mean absolute percentage error (SMAPE)') def __init__(self, use_median: bool): super().__init__(use_median) self.epsilon = 0.0005 # value for the minimum term contribution def compare_hypotheses(self, old: Hypothesis, new: SingleParameterHypothesis, measurements: Sequence[Measurement]): """ Compares the best with the new hypothesis and decides which one is a better fit for the data. If the new hypothesis is better than the best one it becomes the best hypothesis. The choice is made based on the RSS or SMAPE. """ if old == MAX_HYPOTHESIS: return True # get the compound terms of the new hypothesis compound_terms = new.function.compound_terms previous = numpy.seterr(divide='ignore', invalid='ignore') # for all compound terms check if they are smaller than minimum allowed contribution for term in compound_terms: # ignore this hypothesis, since one of the terms contributes less than epsilon to the function if term.coefficient == 0 or new.calc_term_contribution(term, measurements) < self.epsilon: return False numpy.seterr(**previous) # print smapes in debug mode logging.debug("next hypothesis SMAPE: " + str(new.SMAPE) + ' RSS:' + str(new.RSS)) logging.debug("best hypothesis SMAPE: " + str(old.SMAPE) + ' RSS:' + str(old.RSS)) if self.compare_with_RSS: return new.RSS < old.RSS return new.SMAPE < old.SMAPE def create_constant_model(self, measurements: Sequence[Measurement]) -> Tuple[ConstantHypothesis, float]: """ Creates a constant model that fits the data using a ConstantFunction. """ # compute the constant coefficient mean_model = sum(m.value(self.use_median) / len(measurements) for m in measurements) # create a constant function constant_function = ConstantFunction(mean_model) constant_hypothesis = ConstantHypothesis(constant_function, self.use_median) # compute cost of the constant model constant_hypothesis.compute_cost(measurements) constant_cost = constant_hypothesis.RSS return constant_hypothesis, constant_cost def find_best_hypothesis(self, candidate_hypotheses: Iterable[SH], constant_cost: float, measurements: Sequence[Measurement], current_best: H = MAX_HYPOTHESIS) -> Union[SH, H]: """ Searches for the best single parameter hypothesis and returns it. """ # currently the constant hypothesis is the best hypothesis best_hypothesis = current_best # search for the best hypothesis over all functions that can be build with the basic building blocks for i, next_hypothesis in enumerate(candidate_hypotheses): if self.use_crossvalidation: # use leave one out crossvalidation # cycle through points and leave one out per iteration for element_id in range(len(measurements)): # copy measurements to create the training sets training_measurements = list(measurements) # remove one element the set training_measurements.pop(element_id) # validation set validation_measurement = measurements[element_id] # compute the model coefficients based on the training data next_hypothesis.compute_coefficients(training_measurements) # check if the constant coefficient should actually be 0 next_hypothesis.clean_constant_coefficient(self.epsilon, training_measurements) # compute the cost of the single-parameter model for the validation data next_hypothesis.compute_cost(training_measurements, validation_measurement) # compute the model coefficients using all data next_hypothesis.compute_coefficients(measurements) logging.debug(f"single-parameter model {i}: " + next_hypothesis.function.to_string(Parameter('p'))) else: # compute the model coefficients based on the training data next_hypothesis.compute_coefficients(measurements) # check if the constant coefficient should actually be 0 next_hypothesis.clean_constant_coefficient( self.CLEAN_CONSTANT_EPSILON, measurements) # compute the cost of the single-parameter model for the validation data next_hypothesis.compute_cost_all_points(measurements) # compute the AR2 for the hypothesis next_hypothesis.compute_adjusted_rsquared(constant_cost, measurements) # check if hypothesis is valid if not next_hypothesis.is_valid(): logging.info( "Numeric imprecision found. Model is invalid and will be ignored.") # compare the new hypothesis with the best hypothesis elif self.compare_hypotheses(best_hypothesis, next_hypothesis, measurements): best_hypothesis = next_hypothesis return best_hypothesis @staticmethod def are_measurements_log_capable(measurements, check_negative_exponents=False): """ Checks if logarithmic models can be used to describe the measurements. If the parameter values are smaller than 1 log terms are not allowed.""" if check_negative_exponents: for measurement in measurements: for value in measurement.coordinate: if value <= 1.0: return False else: for measurement in measurements: for value in measurement.coordinate: if value < 1.0: return False return True