示例#1
0
文件: simplex.py 项目: nberkel05/q2mm
    def run(self, r_data=None):
        """
        Once all attributes are setup as you so desire, run this method to
        optimize the parameters.

        Returns
        -------
        `datatypes.FF` (or subclass)
            Contains the best parameters.
        """
        if r_data is None:
            r_data = opt.return_ref_data(self.args_ref)

        if self.ff.score is None:
            logger.log(20, '~~ CALCULATING INITIAL FF SCORE ~~'.rjust(79, '~'))
            self.ff.export_ff()
            # Could store data on self.ff.data if we wanted. Not necessary for
            # simplex. If simplex yielded no improvements, it would return this
            # FF, and then we might want the data such taht we don't have to
            # recalculate it in gradient. Let's hope simplex generally yields
            # improvements.
            data = calculate.main(self.args_ff)
            self.ff.score = compare.compare_data(r_data, data)
        else:
            logger.log(20, '  -- Reused existing score and data for initial FF.')

        logger.log(20, '~~ SIMPLEX OPTIMIZATION ~~'.rjust(79, '~'))
        logger.log(20, 'INIT FF SCORE: {}'.format(self.ff.score))
        opt.pretty_ff_results(self.ff, level=20)

        if self.max_params and len(self.ff.params) > self.max_params:
            logger.log(20, '  -- More parameters than the maximum allowed.')
            logger.log(5, 'CURRENT PARAMS: {}'.format(len(self.ff.params)))
            logger.log(5, 'MAX PARAMS: {}'.format(self.max_params))
            # Here we select the parameters that have the lowest 2nd
            # derivatives.

            # THIS IS SCHEDULED FOR CHANGING. THIS IS ACTUALLY NOT A GOOD
            # CRITERION FOR PARAMETER SELECTION.
            if self.ff.params[0].d1:
                logger.log(15, '  -- Reusing existing parameter derivatives.')
                # Differentiate all parameters forward. Yes, I know this is
                # counter-intuitive because we are going to only use subset of
                # the forward differentiated FFs. However, this is very
                # computationally inexpensive because we're not scoring them
                # now. We will remove the forward differentiated FFs we don't
                # want before scoring.
                ffs = opt.differentiate_ff(self.ff, central=False)
            else:
                logger.log(15, '  -- Calculating new parameter derivatives.')
                # Do central differentiation so we can calculate derivatives.
                # Another option would be to write code to determine
                # derivatives only from forward differentiation.
                ffs = opt.differentiate_ff(self.ff, central=True)
                # We have to score to get the derivatives.
                for ff in ffs:
                    ff.export_ff(lines=self.ff_lines)
                    logger.log(20, '  -- Calculating {}.'.format(ff))
                    data = calculate.main(self.args_ff)
                    ff.score = compare.compare_data(r_data, data)
                    opt.pretty_ff_results(ff)
                # Add the derivatives to your original FF.
                opt.param_derivs(self.ff, ffs)
                # Only keep the forward differentiated FFs.
                ffs = opt.extract_forward(ffs)
                logger.log(5, '  -- Keeping {} forward differentiated '
                           'FFs.'.format(len(ffs)))

            # This sorts the parameters based upon their 2nd derivative.
            # It keeps the ones with lowest 2nd derivatives.

            # SCHEDULED FOR CHANGES. NOT A GOOD SORTING CRITERION.
            params = select_simp_params_on_derivs(
                self.ff.params, max_params=self.max_params)
            # From the entire list of forward differentiated FFs, pick
            # out the ones that have the lowest 2nd derivatives.
            self.new_ffs = opt.extract_ff_by_params(ffs, params)
            logger.log(1, '>>> len(self.new_ffs): {}'.format(len(self.new_ffs)))

            # Reduce number of parameters.
            # Will need an option that's not MM3* specific in the future.
            ff_rows = [x.mm3_row for x in params]
            ff_cols = [x.mm3_col for x in params]
            for ff in self.new_ffs:
                new_params = []
                for param in ff.params:
                    if param.mm3_row in ff_rows and param.mm3_col in ff_cols:
                        new_params.append(param)
                ff.params = new_params
            # Make a copy of your original FF that has less parameters.
            ff_copy = copy.deepcopy(self.ff)
            new_params = []
            for param in ff.params:
                if param.mm3_row in ff_rows and param.mm3_col in ff_cols:
                    new_params.append(param)
            ff_copy.params = new_params
        else:
            # In this case it's simple. Just forward differentiate each
            # parameter.
            self.new_ffs = opt.differentiate_ff(self.ff, central=False)
            logger.log(1, '>>> len(self.new_ffs): {}'.format(len(self.new_ffs)))
            # Still make that FF copy.
            ff_copy = copy.deepcopy(self.ff)
        # Double check and make sure they're all scored.
        for ff in self.new_ffs:
            if ff.score is None:
                ff.export_ff(lines=self.ff_lines)
                logger.log(20, '  -- Calculating {}.'.format(ff))
                data = calculate.main(self.args_ff)
                ff.score = compare.compare_data(r_data, data)
                opt.pretty_ff_results(ff)
        # Add your copy of the orignal to FF to the forward differentiated FFs.
        self.new_ffs = sorted(self.new_ffs + [ff_copy], key=lambda x: x.score)
        # Allow 3 cycles w/o change for each parameter present. Remember that
        # the initial FF was added here, hence the minus one.
        self._max_cycles_wo_change = 3 * (len(self.new_ffs) - 1)
        wrapper = textwrap.TextWrapper(width=79)
        # Shows all FFs parameters.
        opt.pretty_ff_params(self.new_ffs)
        # Start the simplex cycles.
        current_cycle = 0
        cycles_wo_change = 0
        while current_cycle < self.max_cycles \
                and cycles_wo_change < self._max_cycles_wo_change:
            current_cycle += 1
            last_best = self.new_ffs[0].score
            best_ff = self.new_ffs[0]
            logger.log(20, '~~ START SIMPLEX CYCLE {} ~~'.format(
                    current_cycle).rjust(79, '~'))
            logger.log(20, 'ORDERED FF SCORES:')
            logger.log(20, wrapper.fill('{}'.format(
                    ' '.join('{:15.4f}'.format(x.score) for x in self.new_ffs))))

            # !!! FOR TESTING !!!

            # Write the best and worst FFs to some other directory. Then
            # write the worst FF to optimization working directory. Then
            # raise opt.OptError. The worst FF should be overwritten by
            # the best FF afterwards.

            # if current_cycle == 5:
            #     self.new_ffs[-1].export_ff(
            #         path='ref_methanol_flds/mm3_worst.fld',
            #         lines=self.ff.lines)
            #     self.new_ffs[0].export_ff(
            #         path='ref_methanol_flds/mm3_best.fld',
            #         lines=self.ff.lines)
            #     self.new_ffs[-1].export_ff(
            #         path='ref_methanol/mm3.fld',
            #         lines=self.ff.lines)
            #     raise opt.OptError

            # !!! END TESTING !!!

            inv_ff = self.ff.__class__()
            if self.do_weighted_reflection:
                inv_ff.method = 'WEIGHTED INVERSION'
            else:
                inv_ff.method = 'INVERSION'
            inv_ff.params = copy.deepcopy(best_ff.params)
            ref_ff = self.ff.__class__()
            ref_ff.method = 'REFLECTION'
            ref_ff.params = copy.deepcopy(best_ff.params)
            # Need score difference sum for weighted inversion.
            # Calculate this value before going into loop.
            if self.do_weighted_reflection:
                # If zero, should break.
                score_diff_sum = sum([x.score - self.new_ffs[-1].score
                                      for x in self.new_ffs[:-1]])
                if score_diff_sum == 0.:
                    logger.warning(
                        'No difference between force field scores. '
                        'Exiting simplex.')
                    # We want to raise opt.OptError such that
                    # opt.catch_run_errors will write the best FF obtained thus
                    # far.
                    raise opt.OptError(
                        'No difference between force field scores. '
                        'Exiting simplex.')
            for i in xrange(0, len(best_ff.params)):
                if self.do_weighted_reflection:
                    inv_val = (
                        sum([x.params[i].value * 
                             (x.score - self.new_ffs[-1].score)
                             for x in self.new_ffs[:-1]])
                        / score_diff_sum)
                else:
                    inv_val = (
                        sum([x.params[i].value for x in self.new_ffs[:-1]])
                        /
                        len(self.new_ffs[:-1]))
                inv_ff.params[i].value = inv_val
                ref_ff.params[i].value = (
                    2 * inv_val - self.new_ffs[-1].params[i].value)
            # The inversion point does not need to be scored.
            # Calculate score for reflected parameters.
            self.ff.export_ff(self.ff.path, params=ref_ff.params)
            data = calculate.main(self.args_ff)
            ref_ff.score = compare.compare_data(r_data, data)
            opt.pretty_ff_results(ref_ff)
            if ref_ff.score < self.new_ffs[0].score:
                logger.log(20, '~~ ATTEMPTING EXPANSION ~~'.rjust(79, '~'))
                exp_ff = self.ff.__class__()
                exp_ff.method = 'EXPANSION'
                exp_ff.params = copy.deepcopy(best_ff.params)
                for i in xrange(0, len(self.new_ffs[0].params)):
                    exp_ff.params[i].value = (
                        3 * inv_ff.params[i].value -
                        2 * self.new_ffs[-1].params[i].value)
                self.ff.export_ff(self.ff.path, exp_ff.params)
                data = calculate.main(self.args_ff)
                exp_ff.score = compare.compare_data(r_data, data)
                opt.pretty_ff_results(exp_ff)
                if exp_ff.score < ref_ff.score:
                    self.new_ffs[-1] = exp_ff
                    logger.log(
                        20, '  -- Expansion succeeded. Keeping expanded '
                        'parameters.')
                else:
                    self.new_ffs[-1] = ref_ff
                    logger.log(
                        20, '  -- Expansion failed. Keeping reflected parameters.')
            elif ref_ff.score < self.new_ffs[-2].score:
                logger.log(20, '  -- Keeping reflected parameters.')
                self.new_ffs[-1] = ref_ff
            else:
                logger.log(20, '~~ ATTEMPTING CONTRACTION ~~'.rjust(79, '~'))
                con_ff = self.ff.__class__()
                con_ff.method = 'CONTRACTION'
                con_ff.params = copy.deepcopy(best_ff.params)
                for i in xrange(0, len(best_ff.params)):
                    if ref_ff.score > self.new_ffs[-1].score:
                        con_val = (
                            (inv_ff.params[i].value +
                             self.new_ffs[-1].params[i].value) / 2)
                    else:
                        con_val = (
                            (3 * inv_ff.params[i].value -
                             self.new_ffs[-1].params[i].value) / 2)
                    con_ff.params[i].value = con_val
                self.ff.export_ff(self.ff.path, params=con_ff.params)
                data = calculate.main(self.args_ff)
                con_ff.score = compare.compare_data(r_data, data)
                opt.pretty_ff_results(con_ff)
                # This change was made to reflect the 1998 Q2MM publication.
                # if con_ff.score < self.new_ffs[-1].score:
                if con_ff.score < self.new_ffs[-2].score:
                    logger.log(20, '  -- Contraction succeeded.')
                    self.new_ffs[-1] = con_ff
                elif self.do_massive_contraction:
                    logger.log(
                        20, '~~ DOING MASSIVE CONTRACTION ~~'.rjust(79, '~'))
                    for ff_num, ff in enumerate(self.new_ffs[1:]):
                        for i in xrange(0, len(best_ff.params)):
                            ff.params[i].value = (
                                (ff.params[i].value +
                                 self.new_ffs[0].params[i].value) / 2)
                        self.ff.export_ff(self.ff.path, params=ff.params)
                        data = calculate.main(self.args_ff)
                        ff.score = compare.compare_data(r_data, data)
                        ff.method += ' MC'
                        opt.pretty_ff_results(ff)
                else:
                    logger.log(
                        20, '  -- Contraction failed. Keeping parmaeters '
                        'anyway.')
                    self.new_ffs[-1] = con_ff
            self.new_ffs = sorted(self.new_ffs, key=lambda x: x.score)
            if self.new_ffs[0].score < last_best:
                cycles_wo_change = 0
            else:
                cycles_wo_change += 1
                logger.log(20, '  -- {} cycles without improvement out of {} '
                           'allowed.'.format(
                        cycles_wo_change, self._max_cycles_wo_change))
            best_ff = self.new_ffs[0]
            logger.log(20, 'BEST:')
            opt.pretty_ff_results(self.new_ffs[0], level=20)
            logger.log(20, '~~ END SIMPLEX CYCLE {} ~~'.format(
                    current_cycle).rjust(79, '~'))
        if best_ff.score < self.ff.score:
            logger.log(20, '~~ SIMPLEX FINISHED WITH IMPROVEMENTS ~~'.rjust(
                    79, '~'))
            best_ff = restore_simp_ff(best_ff, self.ff)
        else:
            logger.log(20, '~~ SIMPLEX FINISHED WITHOUT IMPROVEMENTS ~~'.rjust(
                    79, '~'))
        opt.pretty_ff_results(self.ff, level=20)
        opt.pretty_ff_results(best_ff, level=20)
        logger.log(20, '  -- Writing best force field from simplex.')
        best_ff.export_ff(best_ff.path)
        return best_ff
示例#2
0
def reduce_num_simp_ffs(ffs, params):
    simp_ffs = opt.extract_forward(ffs)
    simp_ffs = opt.extract_ff_by_params(ffs, params)
    return simp_ffs
示例#3
0
文件: simplex.py 项目: peonor/q2mm
    def run(self, r_data=None):
        """
        Once all attributes are setup as you so desire, run this method to
        optimize the parameters.

        Returns
        -------
        `datatypes.FF` (or subclass)
            Contains the best parameters.
        """
        if r_data is None:
            r_data = opt.return_ref_data(self.args_ref)
        logger.log(20, '~~ SIMPLEX OPTIMIZATION ~~'.rjust(79, '~'))
        # Here we don't actually need the database connection/force field data.
        # We only need the score.
        if self.ff.score is None:
            logger.log(20, '~~ CALCULATING INITIAL FF SCORE ~~'.rjust(79, '~'))
            self.ff.export_ff()
            # I could store this object to prevent on self.ff to prevent garbage
            # collection. Would be nice if simplex was followed by gradient,
            # which needs that information, and if simplex yielded no
            # improvements. At most points in the optimization, this is probably
            # too infrequent for it to be worth the memory, but it might be nice
            # once the parameters are close to convergence.
            data = calculate.main(self.args_ff)
            self.ff.score = compare.compare_data(r_data, data)
            logger.log(20, 'INITIAL FF SCORE: {}'.format(self.ff.score))
        else:
            logger.log(15,
                       '  -- Reused existing score and data for initial FF.')
        logger.log(15, 'INIT FF SCORE: {}'.format(self.ff.score))
        if self.max_params and len(self.ff.params) > self.max_params:
            if self.ff.params[0].d1:
                logger.log(15, '  -- Reusing existing parameter derivatives.')
                # Don't score so this really doesn't take much time.
                ffs = opt.differentiate_ff(self.ff, central=False)
            else:
                logger.log(15, '  -- Calculating new parameter derivatives.')
                ffs = opt.differentiate_ff(self.ff, central=True)
                # We have to score to get the derivatives.
                for ff in ffs:
                    ff.export_ff(lines=self.ff_lines)
                    logger.log(20, '  -- Calculating {}.'.format(ff))
                    data = calculate.main(self.args_ff)
                    ff.score = compare.compare_data(r_data, data)
                    opt.pretty_ff_results(ff)
                opt.param_derivs(self.ff, ffs)
                # Only keep the forward differentiated FFs.
                ffs = opt.extract_forward(ffs)
            params = select_simp_params_on_derivs(self.ff.params,
                                                  max_params=self.max_params)
            self.new_ffs = opt.extract_ff_by_params(ffs, params)
            # Reduce number of parameters.
            # Will need an option that's not MM3* specific.
            ff_rows = [x.mm3_row for x in params]
            ff_cols = [x.mm3_col for x in params]
            for ff in self.new_ffs:
                new_params = []
                for param in ff.params:
                    if param.mm3_row in ff_rows and param.mm3_col in ff_cols:
                        new_params.append(param)
                ff.params = new_params
        else:
            self.new_ffs = opt.differentiate_ff(self.ff, central=False)
        # Double check and make sure they're all scored.
        for ff in self.new_ffs:
            if ff.score is None:
                ff.export_ff(lines=self.ff_lines)
                logger.log(20, '  -- Calculating {}.'.format(ff))
                data = calculate.main(self.args_ff)
                ff.score = compare.compare_data(r_data, data)
                opt.pretty_ff_results(ff)
        ff_copy = copy.deepcopy(self.ff)
        new_params = []
        for param in ff.params:
            if param.mm3_row in ff_rows and param.mm3_col in ff_cols:
                new_params.append(param)
        ff_copy.params = new_params
        self.new_ffs = sorted(self.new_ffs + [ff_copy], key=lambda x: x.score)
        wrapper = textwrap.TextWrapper(width=79)
        logger.log(20, 'ORDERED FF SCORES:')
        logger.log(
            20,
            wrapper.fill('{}'.format(' '.join('{:15.4f}'.format(x.score)
                                              for x in self.new_ffs))))
        # Shows all FFs parameters.
        opt.pretty_ff_params(self.new_ffs)
        # Start the simplex cycles.
        current_cycle = 0
        cycles_wo_change = 0
        while current_cycle < self.max_cycles \
                and cycles_wo_change < self.max_cycles_wo_change:
            current_cycle += 1
            last_best = self.new_ffs[0].score
            best_ff = self.new_ffs[0]
            logger.log(
                20, '~~ START SIMPLEX CYCLE {} ~~'.format(current_cycle).rjust(
                    79, '~'))
            inv_ff = self.ff.__class__()
            if self.do_weighted_reflection:
                inv_ff.method = 'WEIGHTED INVERSION'
            else:
                inv_ff.method = 'INVERSION'
            inv_ff.params = copy.deepcopy(best_ff.params)
            ref_ff = self.ff.__class__()
            ref_ff.method = 'REFLECTION'
            ref_ff.params = copy.deepcopy(best_ff.params)
            for i in xrange(0, len(best_ff.params)):
                if self.do_weighted_reflection:
                    try:
                        inv_val = (sum([
                            x.params[i].value *
                            (x.score - self.new_ffs[-1].score)
                            for x in self.new_ffs[:-1]
                        ]) / sum([
                            x.score - self.new_ffs[-1].score
                            for x in self.new_ffs[:-1]
                        ]))
                    except ZeroDivisionError:
                        logger.warning(
                            'Attempted to divide by zero while calculating the '
                            'weighted simplex inversion point. All penalty '
                            'function scores for the trial force fields are '
                            'numerically equivalent.')
                        # Breaking should just exit the while loop. Should still
                        # give you the best force field determined thus far.
                        break
                else:
                    inv_val = (
                        sum([x.params[i].value for x in self.new_ffs[:-1]]) /
                        len(self.new_ffs[:-1]))
                inv_ff.params[i].value = inv_val
                ref_ff.params[i].value = (2 * inv_val -
                                          self.new_ffs[-1].params[i].value)
            # Calculate score for inverted parameters.
            self.ff.export_ff(self.ff.path, params=inv_ff.params)
            data = calculate.main(self.args_ff)
            inv_ff.score = compare.compare_data(r_data, data)
            opt.pretty_ff_results(inv_ff)
            # Calculate score for reflected parameters.
            self.ff.export_ff(self.ff.path, params=ref_ff.params)
            data = calculate.main(self.args_ff)
            ref_ff.score = compare.compare_data(r_data, data)
            opt.pretty_ff_results(ref_ff)
            if ref_ff.score < self.new_ffs[0].score:
                logger.log(20, '~~ ATTEMPTING EXPANSION ~~'.rjust(79, '~'))
                exp_ff = self.ff.__class__()
                exp_ff.method = 'EXPANSION'
                exp_ff.params = copy.deepcopy(best_ff.params)
                for i in xrange(0, len(self.new_ffs[0].params)):
                    exp_ff.params[i].value = (
                        3 * inv_ff.params[i].value -
                        2 * self.new_ffs[-1].params[i].value)
                self.ff.export_ff(self.ff.path, exp_ff.params)
                data = calculate.main(self.args_ff)
                exp_ff.score = compare.compare_data(r_data, data)
                opt.pretty_ff_results(exp_ff)
                if exp_ff.score < ref_ff.score:
                    self.new_ffs[-1] = exp_ff
                    logger.log(
                        20, '  -- Expansion succeeded. Keeping expanded '
                        'parameters.')
                else:
                    self.new_ffs[-1] = ref_ff
                    logger.log(
                        20,
                        '  -- Expansion failed. Keeping reflected parameters.')
            elif ref_ff.score < self.new_ffs[-2].score:
                logger.log(20, '  -- Keeping reflected parameters.')
                self.new_ffs[-1] = ref_ff
            else:
                logger.log(20, '~~ ATTEMPTING CONTRACTION ~~'.rjust(79, '~'))
                con_ff = self.ff.__class__()
                con_ff.method = 'CONTRACTION'
                con_ff.params = copy.deepcopy(best_ff.params)
                for i in xrange(0, len(best_ff.params)):
                    if ref_ff.score > self.new_ffs[-1].score:
                        con_val = ((inv_ff.params[i].value +
                                    self.new_ffs[-1].params[i].value) / 2)
                    else:
                        con_val = ((3 * inv_ff.params[i].value -
                                    self.new_ffs[-1].params[i].value) / 2)
                    con_ff.params[i].value = con_val
                self.ff.export_ff(self.ff.path, params=con_ff.params)
                data = calculate.main(self.args_ff)
                con_ff.score = compare.compare_data(r_data, data)
                opt.pretty_ff_results(con_ff)
                if con_ff.score < self.new_ffs[-2].score:
                    self.new_ffs[-1] = con_ff
                elif self.do_massive_contraction:
                    logger.log(
                        20, '~~ DOING MASSIVE CONTRACTION ~~'.rjust(79, '~'))
                    for ff_num, ff in enumerate(self.new_ffs[1:]):
                        for i in xrange(0, len(best_ff.params)):
                            ff.params[i].value = (
                                (ff.params[i].value +
                                 self.new_ffs[0].params[i].value) / 2)
                        self.ff.export_ff(self.ff.path, params=ff.params)
                        data = calculate.main(self.args_ff)
                        ff.score = compare.compare_data(r_data, data)
                        ff.method += ' MC'
                        opt.pretty_ff_results(ff)
                else:
                    logger.log(20, '  -- Contraction failed.')
            self.new_ffs = sorted(self.new_ffs, key=lambda x: x.score)
            if self.new_ffs[0].score < last_best:
                cycles_wo_change = 0
            else:
                cycles_wo_change += 1
                logger.log(
                    20,
                    '  -- {} cycles without change.'.format(cycles_wo_change))
            best_ff = self.new_ffs[0]
            logger.log(20, 'BEST:')
            opt.pretty_ff_results(self.new_ffs[0], level=20)
            logger.log(
                20, '~~ END SIMPLEX CYCLE {} ~~'.format(current_cycle).rjust(
                    79, '~'))
        if best_ff.score < self.ff.score:
            logger.log(
                20, '~~ SIMPLEX FINISHED WITH IMPROVEMENTS ~~'.rjust(79, '~'))
            best_ff = restore_simp_ff(best_ff, self.ff)
        else:
            logger.log(
                20,
                '~~ SIMPLEX FINISHED WITHOUT IMPROVEMENTS ~~'.rjust(79, '~'))
        opt.pretty_ff_results(self.ff, level=20)
        opt.pretty_ff_results(best_ff, level=20)
        logger.log(20, '  -- Writing best force field from simplex.')
        best_ff.export_ff(best_ff.path)
        return best_ff
示例#4
0
文件: simplex.py 项目: peonor/q2mm
def reduce_num_simp_ffs(ffs, params):
    simp_ffs = opt.extract_forward(ffs)
    simp_ffs = opt.extract_ff_by_params(ffs, params)
    return simp_ffs
示例#5
0
    def run(self, r_data=None):
        """
        Once all attributes are setup as you so desire, run this method to
        optimize the parameters.

        Returns
        -------
        `datatypes.FF` (or subclass)
            Contains the best parameters.
        """
        if r_data is None:
            r_data = opt.return_ref_data(self.args_ref)

        if self.ff.score is None:
            logger.log(20, '~~ CALCULATING INITIAL FF SCORE ~~'.rjust(79, '~'))
            self.ff.export_ff()
            # Could store data on self.ff.data if we wanted. Not necessary for
            # simplex. If simplex yielded no improvements, it would return this
            # FF, and then we might want the data such taht we don't have to
            # recalculate it in gradient. Let's hope simplex generally yields
            # improvements.
            data = calculate.main(self.args_ff)
            #deprecated
            #self.ff.score = compare.compare_data(r_data, data)
            r_dict = compare.data_by_type(r_data)
            c_dict = compare.data_by_type(data)
            r_dict, c_dict = compare.trim_data(r_dict, c_dict)
            self.ff.score = compare.compare_data(r_dict, c_dict)
        else:
            logger.log(20,
                       '  -- Reused existing score and data for initial FF.')

        logger.log(20, '~~ SIMPLEX OPTIMIZATION ~~'.rjust(79, '~'))
        logger.log(20, 'INIT FF SCORE: {}'.format(self.ff.score))
        opt.pretty_ff_results(self.ff, level=20)

        # Here's what we do if there are too many parameters.
        if self.max_params and len(self.ff.params) > self.max_params:
            logger.log(20, '  -- More parameters than the maximum allowed.')
            logger.log(5, 'CURRENT PARAMS: {}'.format(len(self.ff.params)))
            logger.log(5, 'MAX PARAMS: {}'.format(self.max_params))
            # Here we select the parameters that have the lowest 2nd
            # derivatives.

            # Could fail when simplex finds improvements but restores other
            # parameters.
            # if self.ff.params[0].d1:

            if None in [x.d1 for x in self.ff.params]:
                logger.log(15, '  -- Calculating new parameter derivatives.')
                # Do central differentiation so we can calculate derivatives.
                # Another option would be to write code to determine
                # derivatives only from forward differentiation.
                ffs = opt.differentiate_ff(self.ff, central=True)
                # We have to score to get the derivatives.
                for ff in ffs:
                    ff.export_ff(path=self.ff.path, lines=self.ff_lines)
                    logger.log(20, '  -- Calculating {}.'.format(ff))
                    data = calculate.main(self.args_ff)
                    #deprecated
                    #ff.score = compare.compare_data(r_data, data)
                    r_dict = compare.data_by_type(r_data)
                    c_dict = compare.data_by_type(data)
                    r_dict, c_dict = compare.trim_data(r_dict, c_dict)
                    ff.score = compare.compare_data(r_dict, c_dict)
                    opt.pretty_ff_results(ff)
                # Add the derivatives to your original FF.
                opt.param_derivs(self.ff, ffs)
                # Only keep the forward differentiated FFs.
                ffs = opt.extract_forward(ffs)
                logger.log(
                    5, '  -- Keeping {} forward differentiated '
                    'FFs.'.format(len(ffs)))
            else:
                logger.log(15, '  -- Reusing existing parameter derivatives.')
                # Differentiate all parameters forward. Yes, I know this is
                # counter-intuitive because we are going to only use subset of
                # the forward differentiated FFs. However, this is very
                # computationally inexpensive because we're not scoring them
                # now. We will remove the forward differentiated FFs we don't
                # want before scoring.
                ffs = opt.differentiate_ff(self.ff, central=False)

            # This sorts the parameters based upon their 2nd derivative.
            # It keeps the ones with lowest 2nd derivatives.

            # SCHEDULED FOR CHANGES. NOT A GOOD SORTING CRITERION.
            params = select_simp_params_on_derivs(self.ff.params,
                                                  max_params=self.max_params)
            # From the entire list of forward differentiated FFs, pick
            # out the ones that have the lowest 2nd derivatives.
            self.new_ffs = opt.extract_ff_by_params(ffs, params)
            logger.log(1,
                       '>>> len(self.new_ffs): {}'.format(len(self.new_ffs)))

            # Reduce number of parameters.
            # Will need an option that's not MM3* specific in the future.
            ff_rows = [x.mm3_row for x in params]
            ff_cols = [x.mm3_col for x in params]
            for ff in self.new_ffs:
                new_params = []
                for param in ff.params:
                    if param.mm3_row in ff_rows and param.mm3_col in ff_cols:
                        new_params.append(param)
                ff.params = new_params
            # Make a copy of your original FF that has less parameters.
            ff_copy = copy.deepcopy(self.ff)
            new_params = []
            for param in ff.params:
                if param.mm3_row in ff_rows and param.mm3_col in ff_cols:
                    new_params.append(param)
            ff_copy.params = new_params
        else:
            # In this case it's simple. Just forward differentiate each
            # parameter.
            self.new_ffs = opt.differentiate_ff(self.ff, central=False)
            logger.log(1,
                       '>>> len(self.new_ffs): {}'.format(len(self.new_ffs)))
            # Still make that FF copy.
            ff_copy = copy.deepcopy(self.ff)
        # Double check and make sure they're all scored.
        for ff in self.new_ffs:
            if ff.score is None:
                ff.export_ff(path=self.ff.path, lines=self.ff_lines)
                logger.log(20, '  -- Calculating {}.'.format(ff))
                data = calculate.main(self.args_ff)
                #deprecated
                #ff.score = compare.compare_data(r_data, data)
                r_dict = compare.data_by_type(r_data)
                c_dict = compare.data_by_type(data)
                r_dict, c_dict = compare.trim_data(r_dict, c_dict)
                ff.score = compare.compare_data(r_dict, c_dict)
                opt.pretty_ff_results(ff)

        # Add your copy of the orignal to FF to the forward differentiated FFs.
        self.new_ffs = sorted(self.new_ffs + [ff_copy], key=lambda x: x.score)
        # Allow 3 cycles w/o change for each parameter present. Remember that
        # the initial FF was added here, hence the minus one.
        self._max_cycles_wo_change = 3 * (len(self.new_ffs) - 1)
        wrapper = textwrap.TextWrapper(width=79)
        # Shows all FFs parameters.
        opt.pretty_ff_params(self.new_ffs)

        # Start the simplex cycles.
        current_cycle = 0
        cycles_wo_change = 0
        while current_cycle < self.max_cycles \
                and cycles_wo_change < self._max_cycles_wo_change:
            current_cycle += 1

            # Save the last best in case some accidental sort goes on.
            # Plus it makes reading the code a litle easier.
            last_best_ff = copy.deepcopy(self.new_ffs[0])
            logger.log(
                20, '~~ START SIMPLEX CYCLE {} ~~'.format(current_cycle).rjust(
                    79, '~'))
            logger.log(20, 'ORDERED FF SCORES:')
            logger.log(
                20,
                wrapper.fill('{}'.format(' '.join('{:15.4f}'.format(x.score)
                                                  for x in self.new_ffs))))

            inv_ff = self.ff.__class__()
            if self.do_weighted_reflection:
                inv_ff.method = 'WEIGHTED INVERSION'
            else:
                inv_ff.method = 'INVERSION'
            inv_ff.params = copy.deepcopy(last_best_ff.params)
            ref_ff = self.ff.__class__()
            ref_ff.method = 'REFLECTION'
            ref_ff.params = copy.deepcopy(last_best_ff.params)
            # Need score difference sum for weighted inversion.
            # Calculate this value before going into loop.
            if self.do_weighted_reflection:
                # If zero, should break.
                score_diff_sum = sum([
                    x.score - self.new_ffs[-1].score for x in self.new_ffs[:-1]
                ])
                if score_diff_sum == 0.:
                    logger.warning('No difference between force field scores. '
                                   'Exiting simplex.')
                    # We want to raise opt.OptError such that
                    # opt.catch_run_errors will write the best FF obtained thus
                    # far.
                    raise opt.OptError(
                        'No difference between force field scores. '
                        'Exiting simplex.')
            for i in range(0, len(last_best_ff.params)):
                if self.do_weighted_reflection:
                    inv_val = (sum([
                        x.params[i].value * (x.score - self.new_ffs[-1].score)
                        for x in self.new_ffs[:-1]
                    ]) / score_diff_sum)
                else:
                    inv_val = (
                        sum([x.params[i].value for x in self.new_ffs[:-1]]) /
                        len(self.new_ffs[:-1]))
                inv_ff.params[i].value = inv_val
                ref_ff.params[i].value = (2 * inv_val -
                                          self.new_ffs[-1].params[i].value)
            # The inversion point does not need to be scored.
            # Calculate score for reflected parameters.
            ref_ff.export_ff(path=self.ff.path, lines=self.ff.lines)
            data = calculate.main(self.args_ff)
            #deprecated
            #ref_ff.score = compare.compare_data(r_data, data)
            r_dict = compare.data_by_type(r_data)
            c_dict = compare.data_by_type(data)
            r_dict, c_dict = compare.trim_data(r_dict, c_dict)
            ref_ff.score = compare.compare_data(r_dict, c_dict)
            opt.pretty_ff_results(ref_ff)
            if ref_ff.score < last_best_ff.score:
                logger.log(20, '~~ ATTEMPTING EXPANSION ~~'.rjust(79, '~'))
                exp_ff = self.ff.__class__()
                exp_ff.method = 'EXPANSION'
                exp_ff.params = copy.deepcopy(last_best_ff.params)
                for i in range(0, len(last_best_ff.params)):
                    exp_ff.params[i].value = (
                        3 * inv_ff.params[i].value -
                        2 * self.new_ffs[-1].params[i].value)
                exp_ff.export_ff(path=self.ff.path, lines=self.ff.lines)
                data = calculate.main(self.args_ff)
                #deprecated
                #exp_ff.score = compare.compare_data(r_data, data)
                r_dict = compare.data_by_type(r_data)
                c_dict = compare.data_by_type(data)
                r_dict, c_dict = compare.trim_data(r_dict, c_dict)
                exp_ff.score = compare.compare_data(r_dict, c_dict)
                opt.pretty_ff_results(exp_ff)
                if exp_ff.score < ref_ff.score:
                    self.new_ffs[-1] = exp_ff
                    logger.log(
                        20, '  -- Expansion succeeded. Keeping expanded '
                        'parameters.')
                else:
                    self.new_ffs[-1] = ref_ff
                    logger.log(
                        20,
                        '  -- Expansion failed. Keeping reflected parameters.')
            elif ref_ff.score < self.new_ffs[-2].score:
                logger.log(20, '  -- Keeping reflected parameters.')
                self.new_ffs[-1] = ref_ff
            else:
                logger.log(20, '~~ ATTEMPTING CONTRACTION ~~'.rjust(79, '~'))
                con_ff = self.ff.__class__()
                con_ff.method = 'CONTRACTION'
                con_ff.params = copy.deepcopy(last_best_ff.params)
                for i in range(0, len(last_best_ff.params)):
                    if ref_ff.score > self.new_ffs[-1].score:
                        con_val = ((inv_ff.params[i].value +
                                    self.new_ffs[-1].params[i].value) / 2)
                    else:
                        con_val = ((3 * inv_ff.params[i].value -
                                    self.new_ffs[-1].params[i].value) / 2)
                    con_ff.params[i].value = con_val
                self.ff.export_ff(params=con_ff.params)
                data = calculate.main(self.args_ff)
                #deprecated
                #con_ff.score = compare.compare_data(r_data, data)
                r_dict = compare.data_by_type(r_data)
                c_dict = compare.data_by_type(data)
                r_dict, c_dict = compare.trim_data(r_dict, c_dict)
                con_ff.score = compare.compare_data(r_dict, c_dict)
                opt.pretty_ff_results(con_ff)
                # This change was made to reflect the 1998 Q2MM publication.
                # if con_ff.score < self.new_ffs[-1].score:
                if con_ff.score < self.new_ffs[-2].score:
                    logger.log(20, '  -- Contraction succeeded.')
                    self.new_ffs[-1] = con_ff
                elif self.do_massive_contraction:
                    logger.log(
                        20, '~~ DOING MASSIVE CONTRACTION ~~'.rjust(79, '~'))
                    for ff_num, ff in enumerate(self.new_ffs[1:]):
                        for i in range(0, len(last_best_ff.params)):
                            ff.params[i].value = (
                                (ff.params[i].value +
                                 self.new_ffs[0].params[i].value) / 2)
                        self.ff.export_ff(params=ff.params)
                        data = calculate.main(self.args_ff)
                        #deprecated
                        #ff.score = compare.compare_data(r_data, data)
                        r_dict = compare.data_by_type(r_data)
                        c_dict = compare.data_by_type(data)
                        r_dict, c_dict = compare.trim_data(r_dict, c_dict)
                        ff.score = compare.compare_data(r_dict, c_dict)
                        ff.method += ' MC'
                        opt.pretty_ff_results(ff)
                else:
                    logger.log(
                        20, '  -- Contraction failed. Keeping parmaeters '
                        'anyway.')
                    self.new_ffs[-1] = con_ff
            self.new_ffs = sorted(self.new_ffs, key=lambda x: x.score)
            # Keep track of the number of cycles without change. If there's
            # improvement, reset the counter.
            if self.new_ffs[0].score < last_best_ff.score:
                cycles_wo_change = 0
            else:
                cycles_wo_change += 1
                logger.log(
                    20, '  -- {} cycles without improvement out of {} '
                    'allowed.'.format(cycles_wo_change,
                                      self._max_cycles_wo_change))
            logger.log(20, 'BEST:')
            opt.pretty_ff_results(self.new_ffs[0], level=20)
            logger.log(
                20, '~~ END SIMPLEX CYCLE {} ~~'.format(current_cycle).rjust(
                    79, '~'))

        # This sort is likely unnecessary because it should be done at the end
        # of the last loop cycle, but I put it here just in case.
        self.new_ffs = sorted(self.new_ffs, key=lambda x: x.score)
        best_ff = self.new_ffs[0]
        if best_ff.score < self.ff.score:
            logger.log(
                20, '~~ SIMPLEX FINISHED WITH IMPROVEMENTS ~~'.rjust(79, '~'))
            best_ff = restore_simp_ff(best_ff, self.ff)
        else:
            logger.log(
                20,
                '~~ SIMPLEX FINISHED WITHOUT IMPROVEMENTS ~~'.rjust(79, '~'))
            # This restores the inital parameters, so no need to use
            # restore_simp_ff here.
            best_ff = self.ff
        opt.pretty_ff_results(self.ff, level=20)
        opt.pretty_ff_results(best_ff, level=20)
        logger.log(20, '  -- Writing best force field from simplex.')
        best_ff.export_ff(best_ff.path)
        return best_ff
示例#6
0
文件: simplex.py 项目: peonor/q2mm
    def run(self, r_data=None):
        """
        Once all attributes are setup as you so desire, run this method to
        optimize the parameters.

        Returns
        -------
        `datatypes.FF` (or subclass)
            Contains the best parameters.
        """
        if r_data is None:
            r_data = opt.return_ref_data(self.args_ref)
        logger.log(20, '~~ SIMPLEX OPTIMIZATION ~~'.rjust(79, '~'))
        # Here we don't actually need the database connection/force field data.
        # We only need the score.
        if self.ff.score is None:
            logger.log(20, '~~ CALCULATING INITIAL FF SCORE ~~'.rjust(79, '~'))
            self.ff.export_ff()
            # I could store this object to prevent on self.ff to prevent garbage
            # collection. Would be nice if simplex was followed by gradient,
            # which needs that information, and if simplex yielded no
            # improvements. At most points in the optimization, this is probably
            # too infrequent for it to be worth the memory, but it might be nice
            # once the parameters are close to convergence.
            data = calculate.main(self.args_ff)
            self.ff.score = compare.compare_data(r_data, data)
            logger.log(20, 'INITIAL FF SCORE: {}'.format(self.ff.score))
        else:
            logger.log(15, '  -- Reused existing score and data for initial FF.')
        logger.log(15, 'INIT FF SCORE: {}'.format(self.ff.score))
        if self.max_params and len(self.ff.params) > self.max_params:
            if self.ff.params[0].d1:
                logger.log(15, '  -- Reusing existing parameter derivatives.')
                # Don't score so this really doesn't take much time.
                ffs = opt.differentiate_ff(self.ff, central=False)
            else:
                logger.log(15, '  -- Calculating new parameter derivatives.')
                ffs = opt.differentiate_ff(self.ff, central=True)
                # We have to score to get the derivatives.
                for ff in ffs:
                    ff.export_ff(lines=self.ff_lines)
                    logger.log(20, '  -- Calculating {}.'.format(ff))
                    data = calculate.main(self.args_ff)
                    ff.score = compare.compare_data(r_data, data)
                    opt.pretty_ff_results(ff)
                opt.param_derivs(self.ff, ffs)
                # Only keep the forward differentiated FFs.
                ffs = opt.extract_forward(ffs)
            params = select_simp_params_on_derivs(
                self.ff.params, max_params=self.max_params)
            self.new_ffs = opt.extract_ff_by_params(ffs, params)
            # Reduce number of parameters.
            # Will need an option that's not MM3* specific.
            ff_rows = [x.mm3_row for x in params]
            ff_cols = [x.mm3_col for x in params]
            for ff in self.new_ffs:
                new_params = []
                for param in ff.params:
                    if param.mm3_row in ff_rows and param.mm3_col in ff_cols:
                        new_params.append(param)
                ff.params = new_params
        else:
            self.new_ffs = opt.differentiate_ff(self.ff, central=False)
        # Double check and make sure they're all scored.
        for ff in self.new_ffs:
            if ff.score is None:
                ff.export_ff(lines=self.ff_lines)
                logger.log(20, '  -- Calculating {}.'.format(ff))
                data = calculate.main(self.args_ff)
                ff.score = compare.compare_data(r_data, data)
                opt.pretty_ff_results(ff)
        ff_copy = copy.deepcopy(self.ff)
        new_params = []
        for param in ff.params:
            if param.mm3_row in ff_rows and param.mm3_col in ff_cols:
                new_params.append(param)
        ff_copy.params = new_params
        self.new_ffs = sorted(self.new_ffs + [ff_copy], key=lambda x: x.score)
        wrapper = textwrap.TextWrapper(width=79)
        logger.log(20, 'ORDERED FF SCORES:')
        logger.log(20, wrapper.fill('{}'.format(
                ' '.join('{:15.4f}'.format(x.score) for x in self.new_ffs))))
        # Shows all FFs parameters.
        opt.pretty_ff_params(self.new_ffs)
        # Start the simplex cycles.
        current_cycle = 0
        cycles_wo_change = 0
        while current_cycle < self.max_cycles \
                and cycles_wo_change < self.max_cycles_wo_change:
            current_cycle += 1
            last_best = self.new_ffs[0].score
            best_ff = self.new_ffs[0]
            logger.log(20, '~~ START SIMPLEX CYCLE {} ~~'.format(
                    current_cycle).rjust(79, '~'))
            inv_ff = self.ff.__class__()
            if self.do_weighted_reflection:
                inv_ff.method = 'WEIGHTED INVERSION'
            else:
                inv_ff.method = 'INVERSION'
            inv_ff.params = copy.deepcopy(best_ff.params)
            ref_ff = self.ff.__class__()
            ref_ff.method = 'REFLECTION'
            ref_ff.params = copy.deepcopy(best_ff.params)
            for i in xrange(0, len(best_ff.params)):
                if self.do_weighted_reflection:
                    try:
                        inv_val = (
                            sum([x.params[i].value *
                                 (x.score - self.new_ffs[-1].score)
                                 for x in self.new_ffs[:-1]])
                            / 
                            sum([x.score - self.new_ffs[-1].score
                                 for x in self.new_ffs[:-1]]))
                    except ZeroDivisionError:
                        logger.warning(
                            'Attempted to divide by zero while calculating the '
                            'weighted simplex inversion point. All penalty '
                            'function scores for the trial force fields are '
                            'numerically equivalent.')
                        # Breaking should just exit the while loop. Should still
                        # give you the best force field determined thus far.
                        break
                else:
                    inv_val = (
                        sum([x.params[i].value for x in self.new_ffs[:-1]])
                        /
                        len(self.new_ffs[:-1]))
                inv_ff.params[i].value = inv_val
                ref_ff.params[i].value = (
                    2 * inv_val - self.new_ffs[-1].params[i].value)
            # Calculate score for inverted parameters.
            self.ff.export_ff(self.ff.path, params=inv_ff.params)
            data = calculate.main(self.args_ff)
            inv_ff.score = compare.compare_data(r_data, data)
            opt.pretty_ff_results(inv_ff)
            # Calculate score for reflected parameters.
            self.ff.export_ff(self.ff.path, params=ref_ff.params)
            data = calculate.main(self.args_ff)
            ref_ff.score = compare.compare_data(r_data, data)
            opt.pretty_ff_results(ref_ff)
            if ref_ff.score < self.new_ffs[0].score:
                logger.log(20, '~~ ATTEMPTING EXPANSION ~~'.rjust(79, '~'))
                exp_ff = self.ff.__class__()
                exp_ff.method = 'EXPANSION'
                exp_ff.params = copy.deepcopy(best_ff.params)
                for i in xrange(0, len(self.new_ffs[0].params)):
                    exp_ff.params[i].value = (
                        3 * inv_ff.params[i].value -
                        2 * self.new_ffs[-1].params[i].value)
                self.ff.export_ff(self.ff.path, exp_ff.params)
                data = calculate.main(self.args_ff)
                exp_ff.score = compare.compare_data(r_data, data)
                opt.pretty_ff_results(exp_ff)
                if exp_ff.score < ref_ff.score:
                    self.new_ffs[-1] = exp_ff
                    logger.log(
                        20, '  -- Expansion succeeded. Keeping expanded '
                        'parameters.')
                else:
                    self.new_ffs[-1] = ref_ff
                    logger.log(
                        20, '  -- Expansion failed. Keeping reflected parameters.')
            elif ref_ff.score < self.new_ffs[-2].score:
                logger.log(20, '  -- Keeping reflected parameters.')
                self.new_ffs[-1] = ref_ff
            else:
                logger.log(20, '~~ ATTEMPTING CONTRACTION ~~'.rjust(79, '~'))
                con_ff = self.ff.__class__()
                con_ff.method = 'CONTRACTION'
                con_ff.params = copy.deepcopy(best_ff.params)
                for i in xrange(0, len(best_ff.params)):
                    if ref_ff.score > self.new_ffs[-1].score:
                        con_val = (
                            (inv_ff.params[i].value +
                             self.new_ffs[-1].params[i].value) / 2)
                    else:
                        con_val = (
                            (3 * inv_ff.params[i].value -
                             self.new_ffs[-1].params[i].value) / 2)
                    con_ff.params[i].value = con_val
                self.ff.export_ff(self.ff.path, params=con_ff.params)
                data = calculate.main(self.args_ff)
                con_ff.score = compare.compare_data(r_data, data)
                opt.pretty_ff_results(con_ff)
                if con_ff.score < self.new_ffs[-2].score:
                    self.new_ffs[-1] = con_ff
                elif self.do_massive_contraction:
                    logger.log(
                        20, '~~ DOING MASSIVE CONTRACTION ~~'.rjust(79, '~'))
                    for ff_num, ff in enumerate(self.new_ffs[1:]):
                        for i in xrange(0, len(best_ff.params)):
                            ff.params[i].value = (
                                (ff.params[i].value +
                                 self.new_ffs[0].params[i].value) / 2)
                        self.ff.export_ff(self.ff.path, params=ff.params)
                        data = calculate.main(self.args_ff)
                        ff.score = compare.compare_data(r_data, data)
                        ff.method += ' MC'
                        opt.pretty_ff_results(ff)
                else:
                    logger.log(20, '  -- Contraction failed.')
            self.new_ffs = sorted(self.new_ffs, key=lambda x: x.score)
            if self.new_ffs[0].score < last_best:
                cycles_wo_change = 0
            else:
                cycles_wo_change += 1
                logger.log(20, '  -- {} cycles without change.'.format(
                        cycles_wo_change))
            best_ff = self.new_ffs[0]
            logger.log(20, 'BEST:')
            opt.pretty_ff_results(self.new_ffs[0], level=20)
            logger.log(20, '~~ END SIMPLEX CYCLE {} ~~'.format(
                    current_cycle).rjust(79, '~'))
        if best_ff.score < self.ff.score:
            logger.log(20, '~~ SIMPLEX FINISHED WITH IMPROVEMENTS ~~'.rjust(
                    79, '~'))
            best_ff = restore_simp_ff(best_ff, self.ff)
        else:
            logger.log(20, '~~ SIMPLEX FINISHED WITHOUT IMPROVEMENTS ~~'.rjust(
                    79, '~'))
        opt.pretty_ff_results(self.ff, level=20)
        opt.pretty_ff_results(best_ff, level=20)
        logger.log(20, '  -- Writing best force field from simplex.')
        best_ff.export_ff(best_ff.path)
        return best_ff