Пример #1
0
def genDoe(nDV, doeMethod):
    if doeMethod == 'lhs':
        doeTable = DOE.lhs(nDV, samples=nDV * 10)
    elif doeMethod == 'pbdesign':
        doeTable = DOE.pbdesign(nDV)
    elif doeMethod == 'bbdesign':
        doeTable = DOE.bbdesign(nDV)
    else:
        print("Error, You have to check a parameter")
    return doeTable
Пример #2
0
 def genDoe(self):
     if self.doeMethod == 'lhs':
         doeTable = DOE.lhs(self.nDV, samples=self.nDV * 10)
     elif self.doeMethod == 'pbdesign':
         doeTable = DOE.pbdesign(self.nDV)
         temp = len(doeTable) - 1
         for j in range(self.nDV):
             doeTable[temp, j] = 0
     elif self.doeMethod == 'bbdesign':
         doeTable = DOE.bbdesign(self.nDV, center=1)
     else:
         print("Error, You have to check a parameter")
     return doeTable
Пример #3
0
    def _generate_design(self, size):
        """
        Generate a Box-Behnken DOE design.

        Parameters
        ----------
        size : int
            The number of factors for the design.

        Returns
        -------
        ndarray
            The design matrix as a size x levels array of indices.
        """
        if size < 3:
            raise RuntimeError("Total size of design variables is %d,"
                               "but must be at least 3 when using %s. " %
                               (size, self.__class__.__name__))

        doe = pyDOE2.bbdesign(size, center=self._center)

        return doe + 1  # replace [-1, 0, 1] with [0, 1, 2]
Пример #4
0
def ge_compute_pls(X, y, n_comp, pts, delta_x, xlimits, extra_points):
    """
    Gradient-enhanced PLS-coefficients.

    Parameters
    ----------

    X: np.ndarray [n_obs,dim]
            - - The input variables.

    y: np.ndarray [n_obs,ny]
            - The output variable

    n_comp: int
            - Number of principal components used.

    pts: dict()
            - The gradient values.

    delta_x: real
            - The step used in the FOTA.

    xlimits: np.ndarray[dim, 2]
            - The upper and lower var bounds.

    extra_points: int
            - The number of extra points per each training point.

    Returns
    -------

    Coeff_pls: np.ndarray[dim, n_comp]
            - The PLS-coefficients.

    XX: np.ndarray[extra_points*nt, dim]
            - Extra points added (when extra_points > 0)

    yy: np.ndarray[extra_points*nt, 1]
            - Extra points added (when extra_points > 0)

    """
    nt, dim = X.shape
    XX = np.empty(shape=(0, dim))
    yy = np.empty(shape=(0, y.shape[1]))
    _pls = pls(n_comp)

    coeff_pls = np.zeros((nt, dim, n_comp))
    for i in range(nt):
        if dim >= 3:
            sign = np.roll(bbdesign(int(dim), center=1), 1, axis=0)
            _X = np.zeros((sign.shape[0], dim))
            _y = np.zeros((sign.shape[0], 1))
            sign = sign * delta_x * (xlimits[:, 1] - xlimits[:, 0])
            _X = X[i, :] + sign
            for j in range(1, dim + 1):
                sign[:, j - 1] = sign[:, j - 1] * pts[None][j][1][i, 0]
            _y = y[i, :] + np.sum(sign, axis=1).reshape((sign.shape[0], 1))
        else:
            _X = np.zeros((9, dim))
            _y = np.zeros((9, 1))
            # center
            _X[:, :] = X[i, :].copy()
            _y[0, 0] = y[i, 0].copy()
            # right
            _X[1, 0] += delta_x * (xlimits[0, 1] - xlimits[0, 0])
            _y[1, 0] = _y[0, 0].copy() + pts[None][1][1][i, 0] * delta_x * (
                xlimits[0, 1] - xlimits[0, 0])
            # up
            _X[2, 1] += delta_x * (xlimits[1, 1] - xlimits[1, 0])
            _y[2, 0] = _y[0, 0].copy() + pts[None][2][1][i, 0] * delta_x * (
                xlimits[1, 1] - xlimits[1, 0])
            # left
            _X[3, 0] -= delta_x * (xlimits[0, 1] - xlimits[0, 0])
            _y[3, 0] = _y[0, 0].copy() - pts[None][1][1][i, 0] * delta_x * (
                xlimits[0, 1] - xlimits[0, 0])
            # down
            _X[4, 1] -= delta_x * (xlimits[1, 1] - xlimits[1, 0])
            _y[4, 0] = _y[0, 0].copy() - pts[None][2][1][i, 0] * delta_x * (
                xlimits[1, 1] - xlimits[1, 0])
            # right up
            _X[5, 0] += delta_x * (xlimits[0, 1] - xlimits[0, 0])
            _X[5, 1] += delta_x * (xlimits[1, 1] - xlimits[1, 0])
            _y[5, 0] = (_y[0, 0].copy() + pts[None][1][1][i, 0] * delta_x *
                        (xlimits[0, 1] - xlimits[0, 0]) +
                        pts[None][2][1][i, 0] * delta_x *
                        (xlimits[1, 1] - xlimits[1, 0]))
            # left up
            _X[6, 0] -= delta_x * (xlimits[0, 1] - xlimits[0, 0])
            _X[6, 1] += delta_x * (xlimits[1, 1] - xlimits[1, 0])
            _y[6, 0] = (_y[0, 0].copy() - pts[None][1][1][i, 0] * delta_x *
                        (xlimits[0, 1] - xlimits[0, 0]) +
                        pts[None][2][1][i, 0] * delta_x *
                        (xlimits[1, 1] - xlimits[1, 0]))
            # left down
            _X[7, 0] -= delta_x * (xlimits[0, 1] - xlimits[0, 0])
            _X[7, 1] -= delta_x * (xlimits[1, 1] - xlimits[1, 0])
            _y[7, 0] = (_y[0, 0].copy() - pts[None][1][1][i, 0] * delta_x *
                        (xlimits[0, 1] - xlimits[0, 0]) -
                        pts[None][2][1][i, 0] * delta_x *
                        (xlimits[1, 1] - xlimits[1, 0]))
            # right down
            _X[8, 0] += delta_x * (xlimits[0, 1] - xlimits[0, 0])
            _X[8, 1] -= delta_x * (xlimits[1, 1] - xlimits[1, 0])
            _y[8, 0] = (_y[0, 0].copy() + pts[None][1][1][i, 0] * delta_x *
                        (xlimits[0, 1] - xlimits[0, 0]) -
                        pts[None][2][1][i, 0] * delta_x *
                        (xlimits[1, 1] - xlimits[1, 0]))

        # As of sklearn 0.24.1 a zeroed _y raises an exception while sklearn 0.23 returns zeroed x_rotations
        # For now the try/except below is a workaround to restore the 0.23 behaviour
        try:
            _pls.fit(_X.copy(), _y.copy())
            coeff_pls[i, :, :] = _pls.x_rotations_
        except StopIteration:
            coeff_pls[i, :, :] = 0

        # Add additional points
        if extra_points != 0:
            max_coeff = np.argsort(np.abs(coeff_pls[i, :, 0]))[-extra_points:]
            for ii in max_coeff:
                XX = np.vstack((XX, X[i, :]))
                XX[-1, ii] += delta_x * (xlimits[ii, 1] - xlimits[ii, 0])
                yy = np.vstack((yy, y[i]))
                yy[-1] += (pts[None][1 + ii][1][i] * delta_x *
                           (xlimits[ii, 1] - xlimits[ii, 0]))
    return np.abs(coeff_pls).mean(axis=0), XX, yy
Пример #5
0
class ExperimentDesigner:

    _matrix_designers = {
        'fullfactorial2levels': pyDOE2.ff2n,
        'fullfactorial3levels': lambda n: pyDOE2.fullfact([3] * n),
        'placketburman': pyDOE2.pbdesign,
        'boxbehnken': lambda n: pyDOE2.bbdesign(n, 1),
        'ccc': lambda n: pyDOE2.ccdesign(n, (0, 3), face='ccc'),
        'ccf': lambda n: pyDOE2.ccdesign(n, (0, 3), face='ccf'),
        'cci': lambda n: pyDOE2.ccdesign(n, (0, 3), face='cci'),
    }

    def __init__(self,
                 factors,
                 design_type,
                 responses,
                 skip_screening=True,
                 at_edges='distort',
                 relative_step=.25,
                 gsd_reduction='auto',
                 model_selection='brute',
                 n_folds='loo',
                 manual_formula=None,
                 shrinkage=1.0,
                 q2_limit=0.5,
                 gsd_span_ratio=0.5):
        try:
            assert at_edges in ('distort', 'shrink'),\
                'unknown action at_edges: {0}'.format(at_edges)
            assert relative_step is None or 0 < relative_step < 1,\
                'relative_step must be float between 0 and 1 not {}'.format(relative_step)
            assert model_selection in ('brute', 'greedy', 'manual'), \
                'model_selection must be "brute", "greedy", "manual".'
            assert n_folds == 'loo' or (isinstance(n_folds, int) and n_folds > 0), \
                'n_folds must be "loo" or positive integer'
            assert 0.9 <= shrinkage <= 1, 'shrinkage must be float between 0.9 and 1.0, not {}'.format(
                shrinkage)
            assert 0 <= q2_limit <= 1, 'q2_limit must be float between 0 and 1, not {}'.format(
                q2_limit)
            if model_selection == 'manual':
                assert isinstance(manual_formula, str), \
                    'If model_selection is "manual" formula must be provided.'
        except AssertionError as e:
            raise ValueError(str(e))

        self.factors = OrderedDict()
        factor_types = list()
        for factor_name, f_spec in factors.items():
            factor = factor_from_spec(f_spec)
            if isinstance(factor, CategoricalFactor) and skip_screening:
                raise DesignerError(
                    'Can\'t perform optimization with categorical '
                    'variables without prior screening.')

            self.factors[factor_name] = factor
            logging.debug('Sets factor {}: {}'.format(factor_name, factor))

            factor_types.append(f_spec.get('type', 'continuous'))

        self.skip_screening = skip_screening
        self.step_length = relative_step
        self.design_type = design_type
        self.responses = responses
        self.response_values = None
        self.gsd_reduction = gsd_reduction
        self.model_selection = model_selection
        self.n_folds = n_folds
        self.shrinkage = shrinkage
        self.q2_limit = q2_limit
        self._formula = manual_formula
        self._edge_action = at_edges
        self._allowed_phases = ['optimization', 'screening']
        self._phase = 'optimization' if self.skip_screening else 'screening'
        self._n_screening_evaluations = 0
        self._factor_types = factor_types
        self._gsd_span_ratio = gsd_span_ratio
        self._stored_transform = lambda x: x
        self._best_experiment = {
            'optimal_x': pd.Series([]),
            'optimal_y': None,
            'weighted_y': None
        }
        n = len(self.factors)
        try:
            self._matrix_designers[self.design_type.lower()]
        except KeyError:
            raise UnsupportedDesign(self.design_type)

        if len(self.responses) > 1:
            self._desirabilites = {
                name: make_desirability_function(factor)
                for name, factor in self.responses.items()
            }
        else:
            self._desirabilites = None

    def new_design(self):
        """

        :return: Experimental design-sheet.
        :rtype: pandas.DataFrame
        """
        if self._phase == 'screening':
            return self._new_screening_design(reduction=self.gsd_reduction)
        else:
            return self._new_optimization_design()

    def write_factor_csv(self, out_file):
        factors = list()
        idx = pd.Index(['fixed_value', 'current_low', 'current_high'])

        for name, factor in self.factors.items():
            current_min = None
            current_high = None
            fixed_value = None

            if issubclass(type(factor), NumericFactor):
                current_min = factor.current_low
                current_high = factor.current_high
            elif isinstance(factor, CategoricalFactor):
                fixed_value = factor.fixed_value
            else:
                raise NotImplementedError

            data = [fixed_value, current_min, current_high]
            factors.append(pd.Series(data, index=idx, name=name))

        factors_df = pd.DataFrame(factors)
        logging.info('Saving factor settings to {}'.format(out_file))
        factors_df.to_csv(out_file)

    def update_factors_from_csv(self, csv_file):
        factors_df = pd.DataFrame.from_csv(csv_file)
        logging.info('Reading factor settings from {}'.format(csv_file))

        for name, factor in self.factors.items():
            logging.info('Updating factor {}'.format(name))

            if issubclass(type(factor), NumericFactor):
                current_low = factors_df.loc[name]['current_low']
                current_high = factors_df.loc[name]['current_high']
                logging.info('Factor: {}. Setting current_low to {}'.format(
                    name, current_low))
                logging.info('Factor: {}. Setting current_high to {}'.format(
                    name, current_high))
                factor.current_low = current_low
                factor.current_high = current_high
            elif isinstance(factor, CategoricalFactor):
                if pd.isnull(factors_df.loc[name]['fixed_value']):
                    fixed_value = None
                    logging.info(
                        'Factor: {}. Had no fixed_value.'.format(name))
                else:
                    fixed_value = factors_df.loc[name]['fixed_value']
                    logging.info(
                        'Factor: {}. Setting fixed_value to {}.'.format(
                            name, fixed_value))

                factor.fixed_value = fixed_value

    def get_optimal_settings(self, response):
        """
        Calculate optimal factor settings given response. Returns calculated
        optimum.

        If the current phase is 'screening': returns the factor settings of
        the best run and updates the current factor settings.

        If the current phase is 'optimization': returns the factor settings of
        the predicted optimum, but doesn't update current factor settings in
        case a validation step is to be run first

        :param pandas.DataFrame response: Response sheet.

        :returns: Calculated optimum.
        :rtype: OptimizationResult
        """

        self._response_values = response.copy()
        response = response.copy()

        # Perform any transformations or weigh together multiple responses:
        treated_response, criterion = self.treat_response(response)

        if self._phase == 'screening':
            # Find the best screening result and update factors accordingly
            self._screening_response = treated_response
            self._screening_criterion = criterion
            return self._evaluate_screening(treated_response, criterion,
                                            self._gsd_span_ratio)
        else:
            # Predict optimal parameter settings, but don't update factors
            return self._predict_optimum_settings(treated_response, criterion)

    def _update_best_experiment(self, result):
        update = False
        if self._best_experiment['optimal_x'].empty:
            update = True
        elif result['criterion'] == 'maximize':
            if result['weighted_response'] > self._best_experiment[
                    'weighted_y']:
                update = True
        elif result['criterion'] == 'minimize':
            if result['weighted_response'] < self._best_experiment[
                    'weighted_y']:
                update = True
        if update:
            self._best_experiment['optimal_x'] = result['factor_settings']
            self._best_experiment['optimal_y'] = result['response']
            self._best_experiment['weighted_y'] = result['weighted_response']

        return update

    def get_best_experiment(self,
                            experimental_sheet,
                            response_sheet,
                            use_index=1):
        """
        Accepts an experimental design and the corresponding response values.

        Finds the best experiment and updates self._best_experiment.

        Returns the best experiment, to be used in fnc update_factors_from_optimum
        """
        assert isinstance(experimental_sheet, pd.core.frame.DataFrame), \
            'The input experimental sheet must be a pandas DataFrame'
        assert isinstance(response_sheet, pd.core.frame.DataFrame), \
            'The input response sheet must be a pandas DataFrame'
        assert sorted(experimental_sheet.columns) == sorted(self.factors), \
            'The factors of the experimental sheet must match those in the \
            pipeline. You input:\n{}\nThey should be:\n{}'                                                          .format(
                list(experimental_sheet.columns),
                list(self.factors.keys()))
        assert sorted(response_sheet.columns) == sorted(self.responses), \
            'The responses of the response sheet must match those in the \
            pipeline. You input:\n{}\nThey should be:\n{}'                                                          .format(
                list(response_sheet.columns),
                list(self.responses.keys()))

        response = response_sheet.copy()
        treated_response, criterion = self.treat_response(
            response, perform_transform=False)

        treated_response = treated_response.iloc[:, 0]
        if criterion == 'maximize':
            optimum_i = treated_response.argsort().iloc[-use_index]
        elif criterion == 'minimize':
            optimum_i = treated_response.argsort().iloc[use_index - 1]
        else:
            raise NotImplementedError

        optimum_settings = experimental_sheet.iloc[optimum_i]

        results = OrderedDict()
        optimal_weighted_response = np.array(treated_response.iloc[optimum_i])
        optimal_response = response_sheet.iloc[optimum_i]
        results['factor_settings'] = optimum_settings
        results['weighted_response'] = optimal_weighted_response
        results['response'] = optimal_response
        results['criterion'] = criterion
        results['new_best'] = False
        results['old_best'] = self._best_experiment

        has_multiple_responses = response_sheet.shape[1] > 1
        logging.debug('The best response was found in experiment:\n{}'.format(
            optimum_settings.name))
        logging.debug('The response values were:\n{}'.format(
            response_sheet.iloc[optimum_i]))
        if has_multiple_responses:
            logging.debug('The weighed response was:\n{}'.format(
                treated_response.iloc[optimum_i]))
        logging.debug('Will return optimum settings:\n{}'.format(
            results['factor_settings']))
        logging.debug('And best response:\n{}'.format(results['response']))

        if self._update_best_experiment(results):
            results['new_best'] = True

        return results

    def update_factors_from_optimum(self,
                                    optimal_experiment,
                                    tol=0.25,
                                    recovery=False):
        """
        Updates the factor settings based on how far the current settings are
        from those supplied in optimal_experiment['factor_settings'].

        :param OrderedDict optimal_experiment: Output from get_best_experiment
        :param float tol: Accepted relative distance to design space edge.
        :returns: Calculated optimum.
        :rtype: OptimizationResult
        """
        are_numeric = np.array(self._factor_types) != 'categorical'
        numeric_names = np.array(list(self.factors.keys()))[are_numeric]
        numeric_factors = np.array(list(self.factors.values()))[are_numeric]

        optimal_x = optimal_experiment['factor_settings']
        optimal_y = optimal_experiment['weighted_response']
        criterion = optimal_experiment['criterion']

        # Get only numeric factors
        if recovery:
            optimal_x = optimal_x.iloc[optimal_x.index.isin(numeric_names)]

        centers = np.array([f.center for f in numeric_factors])
        spans = np.array([f.span for f in numeric_factors])

        ratios = (optimal_x - centers) / spans
        if not recovery:
            logging.debug(
                'The distance of the factor optimas from the factor centers, '
                'expressed as the ratio of the step length:\n{}'.format(
                    ratios))

        if (abs(ratios) < tol).all():
            converged = True
            if not recovery:
                logging.info('Convergence reached.')
        else:
            converged = False
            if not recovery:
                logging.info('Convergence not reached. Moves design.')

            for ratio, name, factor in zip(ratios, numeric_names,
                                           numeric_factors):
                if abs(ratio) < tol:
                    if not recovery:
                        logging.debug(
                            ('Factor {} not updated - within tolerance '
                             'limits.').format(name))
                    continue

                if not recovery:
                    self._update_numeric_factor(factor, name, ratio)

        converged, reached_limits = self._check_convergence(centers,
                                                            converged,
                                                            criterion,
                                                            optimal_y,
                                                            numeric_factors,
                                                            recovery=recovery)

        optimization_results = pd.Series(index=self._design_sheet.columns,
                                         dtype=object)

        for name, factor in self.factors.items():
            if isinstance(factor, CategoricalFactor):
                optimization_results[name] = factor.fixed_value
            else:
                optimization_results[name] = optimal_x[name]

        results = OptimizationResult(optimization_results,
                                     converged,
                                     tol,
                                     reached_limits,
                                     empirically_found=True)

        return results

    def _predict_optimum_settings(self, response, criterion):
        """
        Calculate a model from the response and find the optimum.

        :returns: Calculated optimum.
        :rtype: OptimizationResult
        """
        logging.info('Predicting optimum')

        are_numeric = np.array(self._factor_types) != 'categorical'
        numeric_names = np.array(list(self.factors.keys()))[are_numeric]

        optimal_x, model, prediction = predict_optimum(
            self._design_sheet.loc[:, are_numeric],
            response.iloc[:, 0].values,
            numeric_names,
            criterion=criterion,
            n_folds=self.n_folds,
            model_selection=self.model_selection,
            manual_formula=self._formula,
            q2_limit=self.q2_limit)

        optimization_results = pd.Series(index=self._design_sheet.columns,
                                         dtype=object)
        if not optimal_x.empty:
            # If Q2 of model was above the limit and if an optimum was found
            for name, factor in self.factors.items():
                if isinstance(factor, CategoricalFactor):
                    optimization_results[name] = factor.fixed_value
                elif isinstance(factor, OrdinalFactor):
                    optimization_results[name] = int(np.round(optimal_x[name]))
                else:
                    optimization_results[name] = optimal_x[name]

        result = OptimizationResult(optimization_results,
                                    converged=False,
                                    tol=0,
                                    reached_limits=False,
                                    empirically_found=False)

        return result

    def treat_response(self, response, perform_transform=True):
        """
        Perform any specified transformations on the response.

        If several responses are defined, combine them into one. The geometric
        mean of Derringer and Suich's desirability functions will be used for
        optimization, see:

        Derringer, G., and Suich, R., (1980), "Simultaneous Optimization
        of Several Response Variables," Journal of Quality Technology, 12,
        4, 214-219.

        Returns a single response variable and the associated maximize/minimize
        criterion.
        """

        has_multiple_responses = response.shape[1] > 1
        for name, spec in self.responses.items():
            transform = spec.get('transform', None)
            response_values = response[name]
            if perform_transform:
                if transform == 'log':
                    logging.debug('Log-transforming response {}'.format(name))
                    response_values = np.log(response_values)
                    self._stored_transform = np.log
                elif transform == 'box-cox':
                    response_values, lambda_ = scipy.stats.boxcox(
                        response_values)
                    logging.debug('Box-cox transforming response {} '
                                  '(lambda={:.4f})'.format(name, lambda_))
                    self._stored_transform = _make_stored_boxcox(lambda_)
                else:
                    self._stored_transform = lambda x: x

            if has_multiple_responses:
                desirability_function = self._desirabilites[name]
                response_values = [
                    desirability_function(value) for value in response_values
                ]
            response[name] = response_values

        if has_multiple_responses:
            response = np.power(response.product(axis=1),
                                (1 / response.shape[1]))
            response = response.to_frame('combined_response')
            criterion = 'maximize'

        else:
            criterion = list(self.responses.values())[0]['criterion']

        return response, criterion

    def reevaluate_screening(self):
        if self._screening_response is None:
            raise DesignerError('screening must be run before re-evaluation')

        return self._evaluate_screening(self._screening_response,
                                        self._screening_criterion,
                                        self._gsd_span_ratio,
                                        self._n_screening_evaluations + 1)

    def _validate_new_factor_limits(self, factor, factor_name, low_limit,
                                    high_limit):
        # If the proposed step change takes us below or above min and max:
        logging.debug('Factor {}: Proposed new factor low is {}.'.format(
            factor_name, low_limit))
        logging.debug('Factor {}: Proposed new factor high is {}.'.format(
            factor_name, high_limit))
        adjusted_settings = False
        if low_limit < factor.min:
            nudge = abs(low_limit - factor.min)
            logging.debug(
                'Factor {}: Minimum allowed setting ({}) would be exceeded by '
                'the proposed new factor low.'.format(factor_name, factor.min))
            low_limit += nudge
            high_limit += nudge
            adjusted_settings = True

        elif high_limit > factor.max:
            nudge = abs(high_limit - factor.max)
            logging.debug(
                'Factor {}: Maximum allowed setting ({}) would be exceeded by '
                'the proposed new factor high.'.format(factor_name,
                                                       factor.max))
            low_limit -= nudge
            high_limit -= nudge
            adjusted_settings = True

        if adjusted_settings:
            logging.debug('Factor {}: Adjusted the proposed new factor '
                          'settings by {}.'.format(factor_name, nudge))
            logging.debug('Factor {}: New factor low is {}.'.format(
                factor_name, low_limit))
            logging.debug('Factor {}: New factor high is {}.'.format(
                factor_name, high_limit))

        return (low_limit, high_limit)

    def _evaluate_screening(self,
                            response,
                            criterion,
                            span_ratio,
                            use_index=1):
        """
        :param float span_ratio: The ratio of the span between gsd points that will be used in the following optimization design.
        """
        self._n_screening_evaluations += 1

        logging.info('Evaluating screening results.')
        response_series = response.iloc[:, 0]
        factor_items = sorted(self.factors.items())
        if criterion == 'maximize':
            optimum_i = response_series.argsort().iloc[-use_index]
        elif criterion == 'minimize':
            optimum_i = response_series.argsort().iloc[use_index - 1]
        else:
            raise NotImplementedError

        optimum_design_row = self._design_matrix[optimum_i]
        optimum_settings = OrderedDict()

        # Update all factors according to current results. For each factor,
        # the current_high and current_low will be set to factors level above
        # and below the point in the screening design with the best response.
        for factor_level, (name, factor) in zip(optimum_design_row,
                                                factor_items):
            if isinstance(factor, CategoricalFactor):
                factor_levels = np.array(factor.values)
                factor.fixed_value = factor_levels[factor_level]
            else:
                factor_levels = sorted(self._design_sheet[name].unique())

                min_ = factor_levels[max([0, factor_level - 1])]
                max_ = factor_levels[min(
                    [factor_level + 1,
                     len(factor_levels) - 1])]
                span = max_ - min_

                # Shrink the span a bit
                logging.debug('Factor {} span: {}'.format(name, span))
                logging.debug('Factor {}: adjusting span with '
                              'gsd_span_ratio {}'.format(name, span_ratio))
                span = span * span_ratio
                if isinstance(factor, OrdinalFactor) and span < 2.0:
                    # Make sure ordinal factors' spans don't shrink to the
                    # point where there's no spread in the exp. design
                    logging.debug('Factor {}: span ({}) too small, adjusting '
                                  'to minimal span for ordinal factor.'.format(
                                      name, span))
                    span = 2.0
                logging.debug('Factor {} span: {}'.format(name, span))

                # center around best point
                best_point = factor_levels[factor_level]
                new_low = best_point - span / 2
                new_high = best_point + span / 2

                if isinstance(factor, OrdinalFactor):
                    new_low = int(np.round(new_low))
                    new_high = int(np.round(new_high))

                # nudge new high and low so we don't exceed the limits
                new_low, new_high = self._validate_new_factor_limits(
                    factor, name, new_low, new_high)

                # update factors
                factor.current_low = new_low
                factor.current_high = new_high

            optimum_settings[name] = factor_levels[factor_level]
            logging.info('New settings for factor {}:\n{}'.format(
                name, factor))

        results = OptimizationResult(pd.Series(optimum_settings),
                                     converged=False,
                                     tol=0,
                                     reached_limits=False,
                                     empirically_found=True)

        logging.info('Best screening result was exp no {}'.format(optimum_i))
        logging.info('The corresponding response was:\n{}'.format(
            self._response_values.iloc[optimum_i]))
        if len(self._response_values.columns) > 1:
            logging.info('The combined response was:\n{}'.format(
                response.iloc[optimum_i]))
        logging.info('The factor settings were:\n{}'.format(
            results.predicted_optimum))

        # update current best experiment
        self.get_best_experiment(
            self._design_sheet, self._response_values
            if len(self._response_values.columns) > 1 else response)
        self._phase = 'optimization'
        return results

    def set_phase(self, phase):
        assert phase in self._allowed_phases, 'phase must be one of {}'.format(
            self._allowed_phases)
        self._phase = phase

    def _update_numeric_factor(self, factor, name, ratio):
        logging.info('Factor {}: Updating settings.'.format(name))
        logging.info('Factor {}: Current settings: {}'.format(name, factor))
        step_length = self.step_length if self.step_length is not None \
            else abs(ratio)
        step = factor.span * step_length * np.sign(ratio)
        logging.debug(
            'Factor {}: Step by which settings are adjusted is {}.'.format(
                name, step))
        logging.debug(
            'Factor {}: Current span between high and low is {}.'.format(
                name, factor.span))
        logging.debug('Factor {}: Will shrink the span by {}.'.format(
            name, self.shrinkage))
        new_span = factor.span * self.shrinkage
        logging.debug('Factor {}: New span is {}.'.format(name, new_span))
        if isinstance(factor, QuantitativeFactor):
            current_low_new = factor.center + step - new_span / 2
            current_high_new = factor.center + step + new_span / 2
        elif isinstance(factor, OrdinalFactor):
            current_low_new = np.round(factor.center + step - new_span / 2)
            current_high_new = np.round(factor.center + step + new_span / 2)
        else:
            raise NotImplementedError

        # If the proposed step change takes us below or above min and max:
        new_low, new_high = self._validate_new_factor_limits(
            factor, name, current_low_new, current_high_new)

        factor.current_low = new_low
        factor.current_high = new_high
        logging.info('Factor {}: New settings: {}'.format(name, factor))
        logging.info('Factor {}: Done updating.'.format(name))

    def _new_screening_design(self, reduction='auto'):
        factor_items = sorted(self.factors.items())

        levels = list()
        names = list()
        dtypes = list()
        for name, factor in factor_items:
            names.append(name)

            if isinstance(factor, CategoricalFactor):
                levels.append(factor.values)
                dtypes.append(object)
                continue

            num_levels = factor.screening_levels
            spacing = getattr(factor, 'screening_spacing', 'linear')
            min_ = factor.min
            max_ = factor.max
            if not np.isfinite([min_, max_]).all():
                raise ValueError(
                    'Can\'t perform screening with unbounded factors')

            space = np.linspace if spacing == 'linear' else np.logspace
            values = space(min_, max_, num_levels)

            if isinstance(factor, OrdinalFactor):
                values = sorted(np.unique(np.round(values)))
                dtypes.append(int)
            else:
                dtypes.append(float)

            levels.append(values)

        design_matrix = pyDOE2.gsd(
            [len(values) for values in levels],
            reduction if reduction is not 'auto' else len(levels))
        factor_matrix = list()
        for i, (values, dtype) in enumerate(zip(levels, dtypes)):
            values = np.array(values)[design_matrix[:, i]]
            series = pd.Series(values, dtype=dtype)
            factor_matrix.append(series)

        self._design_matrix = design_matrix
        self._design_sheet = pd.concat(factor_matrix, axis=1, keys=names)
        return self._design_sheet

    def _new_optimization_design(self):
        matrix_designer = self._matrix_designers[self.design_type.lower()]

        numeric_factors = [(name, factor)
                           for name, factor in self.factors.items()
                           if isinstance(factor, NumericFactor)]
        numeric_factor_names = [name for name, factor in numeric_factors]
        design_matrix = matrix_designer(len(numeric_factors))

        mins = np.array([f.min for _, f in numeric_factors])
        maxes = np.array([f.max for _, f in numeric_factors])
        span = np.array([f.span for _, f in numeric_factors])
        centers = np.array([f.center for _, f in numeric_factors])
        factor_matrix = design_matrix * (span / 2.0) + centers

        # Check if current settings are outside allowed design space.
        # Also, for factors that are specified as ordinal, adjust their values
        # in the design matrix to be rounded floats
        for i, (factor_name, factor) in enumerate(numeric_factors):
            if isinstance(factor, OrdinalFactor):
                factor_matrix[:, i] = np.round(factor_matrix[:, i])
            logging.debug('Current setting {}: {}'.format(factor_name, factor))

        if (factor_matrix < mins).any() or (factor_matrix > maxes).any():
            logging.warning(('Out of design space factors. Adjusts factors'
                             'by {}.'.format(self._edge_action + 'ing')))
            if self._edge_action == 'distort':

                # Simply cap out-of-boundary values at mins and maxes.
                capped_mins = np.maximum(factor_matrix, mins)
                capped_mins_and_maxes = np.minimum(capped_mins, maxes)
                factor_matrix = capped_mins_and_maxes

            elif self._edge_action == 'shrink':
                raise NotImplementedError

        factors = list()
        for name, factor in self.factors.items():
            if isinstance(factor, CategoricalFactor):
                values = np.repeat(factor.fixed_value, len(design_matrix))
                factors.append(pd.Series(values))
            else:
                i = numeric_factor_names.index(name)
                dtype = int if isinstance(factor, OrdinalFactor) else float
                factors.append(pd.Series(factor_matrix[:, i].astype(dtype)))

        self._design_sheet = pd.concat(factors,
                                       axis=1,
                                       keys=self.factors.keys())
        return self._design_sheet

    def _check_convergence(self,
                           centers,
                           converged,
                           criterion,
                           prediction,
                           numeric_factors,
                           recovery=False):
        # It's possible that the optimum is predicted to be at the edge of the allowed
        # min or max factor setting. This will produce a high 'ratio' and the algorithm
        # is not considered to have converged (above). However, in this situation we
        # can't move the space any further and we should stop iterating.

        new_centers = np.array([f.center for f in numeric_factors])
        if (centers == new_centers).all():
            if not recovery:
                logging.info(
                    'The design has not moved since last iteration. Converged.'
                )
            converged = True
            reached_limits = True

            if len(self.responses) > 1 and prediction < 1:
                reached_limits = False
            elif len(self.responses) == 1:
                r_spec = list(self.responses.values())[0]
                low_limit = self._stored_transform(r_spec.get('low_limit', 1))
                high_limit = self._stored_transform(r_spec.get(
                    'high_limit', 1))
                if criterion == 'maximize' and 'low_limit' in r_spec:
                    reached_limits = prediction >= low_limit
                elif criterion == 'minimize' and 'high_limit' in r_spec:
                    reached_limits = prediction <= high_limit
                elif criterion == 'target' and 'low_limit' in r_spec and 'high_limit' in r_spec:
                    reached_limits = low_limit <= prediction <= high_limit
        else:
            reached_limits = False
        return converged, reached_limits
Пример #6
0
def opmsens_write_cases(basefile, header, factors, scenario):
    """ Main Function for Writing out Scenario Cases

    This is the main function that controls the writing out of the various requested scenario cases (jobs). The
    function first calls the opmsens_checkerr routine to check for errors and then the opmsens_clean routine to
    remove previously created scenario files. After which the opmsens_write_param and opmsens_write_data functions are
    called to create the scenario PARAM and DATA files

    Parameters
    ----------
    basefile : str
        The basefile used to generate all the cases
    header : list
        A list of of header names
    factors : table
        A table of design factors
    scenario : str
        The type of scenario to be generated

    Returns
    ------
    None
    """

    # Check for Errors and Return if Errors Found
    checkerr = opmsens_check(basefile, header, factors, scenario)
    if checkerr:
        return ()
    #
    # Cleanup Existing Files
    #
    opmsens_clean(basefile)
    #
    # Define Factor and Job Data Frame
    #
    df = pd.DataFrame(factors, columns=header)
    df = df[df != ''].dropna()
    jobdf = pd.DataFrame()
    jobdf[header[1]] = df[header[1]]
    for slevel in ['Low', 'Best', 'High']:
        if slevel in scenario:
            jobdf[slevel] = df[slevel]

    nfactor = jobdf.shape[0]
    nlevel = jobdf.shape[1]
    #
    # Write PARAM and DATA Files
    #
    jobs = []
    jobstart = 1
    joberr = False
    jobdata = Path(basefile)
    jobparam = Path(basefile).with_suffix('.param')
    jobque = Path(basefile).with_suffix('.que')
    print('Scenario:  ' + scenario + ' Start')
    #
    # Low, Best and High Scenario
    #
    if 'Scenario' in scenario:
        jobnum = 0
        for joblevel in range(1, nlevel):
            (joberr, jobs) = opmsens_write_param(jobstart, jobnum, jobparam,
                                                 jobdata, jobs)
            if joberr:
                break
            joberr = opmsens_write_data(scenario, joblevel, nfactor, jobdf,
                                        jobstart, jobnum, jobdata)
            if joberr:
                break
            jobstart = jobstart + joblevel
    #
    # One Job per Factor
    #
    elif 'One Job per Factor' in scenario:
        for joblevel in range(1, nlevel):
            for jobnum in range(0, nfactor):
                (joberr, jobs) = opmsens_write_param(jobstart, jobnum,
                                                     jobparam, jobdata, jobs)
                if joberr:
                    break
                joberr = opmsens_write_data(scenario, joblevel, nfactor, jobdf,
                                            jobstart, jobnum, jobdata)
                if joberr:
                    break
            jobstart = jobstart + nfactor
    #
    # Factorial Low and High Full
    #
    elif 'Factorial' in scenario:
        #
        # Obtain DOE Matrix and Convert to Data Frame
        #
        doedata = pd.DataFrame()
        if 'Factorial Low and High Full' in scenario:
            doedata = pyDOE2.ff2n(nfactor) + 2

        if 'Factorial Low and High Plackett-Burman' in scenario:
            doedata = pyDOE2.pbdesign(nfactor) + 2

        if 'Factorial Low, Best and High Full' in scenario:
            doedata = (pyDOE2.fullfact([nlevel - 1] * nfactor)) - 1

        if 'Factorial Low, Best and High Box-Behnken' in scenario:
            doedata = pyDOE2.bbdesign(nfactor)

        doedf = pd.DataFrame(data=doedata).transpose()
        doedf = doedf.rename(columns=lambda x: 'RUN' + str(x + 1).zfill(3),
                             inplace=False)
        #
        # Set Factor Values
        #
        for n in range(0, nfactor):
            doedf.iloc[n, :] = doedf.iloc[n, :].replace(
                [1.0, 2.0, 3.0], [df.iloc[n, 2], df.iloc[n, 3], df.iloc[n, 4]])
        #
        # Merge Data Frames and Write Out Files
        #
        jobdf = pd.DataFrame()
        jobdf[header[1]] = df[header[1]]
        jobdf = pd.concat([jobdf, doedf], axis=1)
        nfactor = jobdf.shape[0]
        nlevel = jobdf.shape[1]
        jobstart = 0
        for joblevel in range(1, nlevel):
            jobnum = joblevel
            (joberr, jobs) = opmsens_write_param(jobstart, jobnum, jobparam,
                                                 jobdata, jobs)
            if joberr:
                break
            joberr = opmsens_write_data(scenario, joblevel, nfactor, jobdf,
                                        jobstart, jobnum, jobdata)
            if joberr:
                break

    print('Scenario:  ' + scenario + ' End')
    if not joberr:
        print('WriteQueu: Start')
        opmsens_write_queue(jobs)
        print('WriteQueu: End')

    return ()