Пример #1
0
    def run(self, r_data=None):
        """
        Once all attributes are setup as you so desire, run this method to
        optimize the parameters.

        Returns
        -------
        `datatypes.FF` (or subclass)
            Contains the best parameters.
        """
        if r_data is None:
            r_data = opt.return_ref_data(self.args_ref)

        if self.ff.score is None:
            logger.log(20, '~~ CALCULATING INITIAL FF SCORE ~~'.rjust(79, '~'))
            self.ff.export_ff()
            # Could store data on self.ff.data if we wanted. Not necessary for
            # simplex. If simplex yielded no improvements, it would return this
            # FF, and then we might want the data such taht we don't have to
            # recalculate it in gradient. Let's hope simplex generally yields
            # improvements.
            data = calculate.main(self.args_ff)
            self.ff.score = compare.compare_data(r_data, data)
        else:
            logger.log(20, '  -- Reused existing score and data for initial FF.')

        logger.log(20, '~~ SIMPLEX OPTIMIZATION ~~'.rjust(79, '~'))
        logger.log(20, 'INIT FF SCORE: {}'.format(self.ff.score))
        opt.pretty_ff_results(self.ff, level=20)

        if self.max_params and len(self.ff.params) > self.max_params:
            logger.log(20, '  -- More parameters than the maximum allowed.')
            logger.log(5, 'CURRENT PARAMS: {}'.format(len(self.ff.params)))
            logger.log(5, 'MAX PARAMS: {}'.format(self.max_params))
            # Here we select the parameters that have the lowest 2nd
            # derivatives.

            # THIS IS SCHEDULED FOR CHANGING. THIS IS ACTUALLY NOT A GOOD
            # CRITERION FOR PARAMETER SELECTION.
            if self.ff.params[0].d1:
                logger.log(15, '  -- Reusing existing parameter derivatives.')
                # Differentiate all parameters forward. Yes, I know this is
                # counter-intuitive because we are going to only use subset of
                # the forward differentiated FFs. However, this is very
                # computationally inexpensive because we're not scoring them
                # now. We will remove the forward differentiated FFs we don't
                # want before scoring.
                ffs = opt.differentiate_ff(self.ff, central=False)
            else:
                logger.log(15, '  -- Calculating new parameter derivatives.')
                # Do central differentiation so we can calculate derivatives.
                # Another option would be to write code to determine
                # derivatives only from forward differentiation.
                ffs = opt.differentiate_ff(self.ff, central=True)
                # We have to score to get the derivatives.
                for ff in ffs:
                    ff.export_ff(lines=self.ff_lines)
                    logger.log(20, '  -- Calculating {}.'.format(ff))
                    data = calculate.main(self.args_ff)
                    ff.score = compare.compare_data(r_data, data)
                    opt.pretty_ff_results(ff)
                # Add the derivatives to your original FF.
                opt.param_derivs(self.ff, ffs)
                # Only keep the forward differentiated FFs.
                ffs = opt.extract_forward(ffs)
                logger.log(5, '  -- Keeping {} forward differentiated '
                           'FFs.'.format(len(ffs)))

            # This sorts the parameters based upon their 2nd derivative.
            # It keeps the ones with lowest 2nd derivatives.

            # SCHEDULED FOR CHANGES. NOT A GOOD SORTING CRITERION.
            params = select_simp_params_on_derivs(
                self.ff.params, max_params=self.max_params)
            # From the entire list of forward differentiated FFs, pick
            # out the ones that have the lowest 2nd derivatives.
            self.new_ffs = opt.extract_ff_by_params(ffs, params)
            logger.log(1, '>>> len(self.new_ffs): {}'.format(len(self.new_ffs)))

            # Reduce number of parameters.
            # Will need an option that's not MM3* specific in the future.
            ff_rows = [x.mm3_row for x in params]
            ff_cols = [x.mm3_col for x in params]
            for ff in self.new_ffs:
                new_params = []
                for param in ff.params:
                    if param.mm3_row in ff_rows and param.mm3_col in ff_cols:
                        new_params.append(param)
                ff.params = new_params
            # Make a copy of your original FF that has less parameters.
            ff_copy = copy.deepcopy(self.ff)
            new_params = []
            for param in ff.params:
                if param.mm3_row in ff_rows and param.mm3_col in ff_cols:
                    new_params.append(param)
            ff_copy.params = new_params
        else:
            # In this case it's simple. Just forward differentiate each
            # parameter.
            self.new_ffs = opt.differentiate_ff(self.ff, central=False)
            logger.log(1, '>>> len(self.new_ffs): {}'.format(len(self.new_ffs)))
            # Still make that FF copy.
            ff_copy = copy.deepcopy(self.ff)
        # Double check and make sure they're all scored.
        for ff in self.new_ffs:
            if ff.score is None:
                ff.export_ff(lines=self.ff_lines)
                logger.log(20, '  -- Calculating {}.'.format(ff))
                data = calculate.main(self.args_ff)
                ff.score = compare.compare_data(r_data, data)
                opt.pretty_ff_results(ff)
        # Add your copy of the orignal to FF to the forward differentiated FFs.
        self.new_ffs = sorted(self.new_ffs + [ff_copy], key=lambda x: x.score)
        # Allow 3 cycles w/o change for each parameter present. Remember that
        # the initial FF was added here, hence the minus one.
        self._max_cycles_wo_change = 3 * (len(self.new_ffs) - 1)
        wrapper = textwrap.TextWrapper(width=79)
        # Shows all FFs parameters.
        opt.pretty_ff_params(self.new_ffs)
        # Start the simplex cycles.
        current_cycle = 0
        cycles_wo_change = 0
        while current_cycle < self.max_cycles \
                and cycles_wo_change < self._max_cycles_wo_change:
            current_cycle += 1
            last_best = self.new_ffs[0].score
            best_ff = self.new_ffs[0]
            logger.log(20, '~~ START SIMPLEX CYCLE {} ~~'.format(
                    current_cycle).rjust(79, '~'))
            logger.log(20, 'ORDERED FF SCORES:')
            logger.log(20, wrapper.fill('{}'.format(
                    ' '.join('{:15.4f}'.format(x.score) for x in self.new_ffs))))

            # !!! FOR TESTING !!!

            # Write the best and worst FFs to some other directory. Then
            # write the worst FF to optimization working directory. Then
            # raise opt.OptError. The worst FF should be overwritten by
            # the best FF afterwards.

            # if current_cycle == 5:
            #     self.new_ffs[-1].export_ff(
            #         path='ref_methanol_flds/mm3_worst.fld',
            #         lines=self.ff.lines)
            #     self.new_ffs[0].export_ff(
            #         path='ref_methanol_flds/mm3_best.fld',
            #         lines=self.ff.lines)
            #     self.new_ffs[-1].export_ff(
            #         path='ref_methanol/mm3.fld',
            #         lines=self.ff.lines)
            #     raise opt.OptError

            # !!! END TESTING !!!

            inv_ff = self.ff.__class__()
            if self.do_weighted_reflection:
                inv_ff.method = 'WEIGHTED INVERSION'
            else:
                inv_ff.method = 'INVERSION'
            inv_ff.params = copy.deepcopy(best_ff.params)
            ref_ff = self.ff.__class__()
            ref_ff.method = 'REFLECTION'
            ref_ff.params = copy.deepcopy(best_ff.params)
            # Need score difference sum for weighted inversion.
            # Calculate this value before going into loop.
            if self.do_weighted_reflection:
                # If zero, should break.
                score_diff_sum = sum([x.score - self.new_ffs[-1].score
                                      for x in self.new_ffs[:-1]])
                if score_diff_sum == 0.:
                    logger.warning(
                        'No difference between force field scores. '
                        'Exiting simplex.')
                    # We want to raise opt.OptError such that
                    # opt.catch_run_errors will write the best FF obtained thus
                    # far.
                    raise opt.OptError(
                        'No difference between force field scores. '
                        'Exiting simplex.')
            for i in xrange(0, len(best_ff.params)):
                if self.do_weighted_reflection:
                    inv_val = (
                        sum([x.params[i].value * 
                             (x.score - self.new_ffs[-1].score)
                             for x in self.new_ffs[:-1]])
                        / score_diff_sum)
                else:
                    inv_val = (
                        sum([x.params[i].value for x in self.new_ffs[:-1]])
                        /
                        len(self.new_ffs[:-1]))
                inv_ff.params[i].value = inv_val
                ref_ff.params[i].value = (
                    2 * inv_val - self.new_ffs[-1].params[i].value)
            # The inversion point does not need to be scored.
            # Calculate score for reflected parameters.
            self.ff.export_ff(self.ff.path, params=ref_ff.params)
            data = calculate.main(self.args_ff)
            ref_ff.score = compare.compare_data(r_data, data)
            opt.pretty_ff_results(ref_ff)
            if ref_ff.score < self.new_ffs[0].score:
                logger.log(20, '~~ ATTEMPTING EXPANSION ~~'.rjust(79, '~'))
                exp_ff = self.ff.__class__()
                exp_ff.method = 'EXPANSION'
                exp_ff.params = copy.deepcopy(best_ff.params)
                for i in xrange(0, len(self.new_ffs[0].params)):
                    exp_ff.params[i].value = (
                        3 * inv_ff.params[i].value -
                        2 * self.new_ffs[-1].params[i].value)
                self.ff.export_ff(self.ff.path, exp_ff.params)
                data = calculate.main(self.args_ff)
                exp_ff.score = compare.compare_data(r_data, data)
                opt.pretty_ff_results(exp_ff)
                if exp_ff.score < ref_ff.score:
                    self.new_ffs[-1] = exp_ff
                    logger.log(
                        20, '  -- Expansion succeeded. Keeping expanded '
                        'parameters.')
                else:
                    self.new_ffs[-1] = ref_ff
                    logger.log(
                        20, '  -- Expansion failed. Keeping reflected parameters.')
            elif ref_ff.score < self.new_ffs[-2].score:
                logger.log(20, '  -- Keeping reflected parameters.')
                self.new_ffs[-1] = ref_ff
            else:
                logger.log(20, '~~ ATTEMPTING CONTRACTION ~~'.rjust(79, '~'))
                con_ff = self.ff.__class__()
                con_ff.method = 'CONTRACTION'
                con_ff.params = copy.deepcopy(best_ff.params)
                for i in xrange(0, len(best_ff.params)):
                    if ref_ff.score > self.new_ffs[-1].score:
                        con_val = (
                            (inv_ff.params[i].value +
                             self.new_ffs[-1].params[i].value) / 2)
                    else:
                        con_val = (
                            (3 * inv_ff.params[i].value -
                             self.new_ffs[-1].params[i].value) / 2)
                    con_ff.params[i].value = con_val
                self.ff.export_ff(self.ff.path, params=con_ff.params)
                data = calculate.main(self.args_ff)
                con_ff.score = compare.compare_data(r_data, data)
                opt.pretty_ff_results(con_ff)
                # This change was made to reflect the 1998 Q2MM publication.
                # if con_ff.score < self.new_ffs[-1].score:
                if con_ff.score < self.new_ffs[-2].score:
                    logger.log(20, '  -- Contraction succeeded.')
                    self.new_ffs[-1] = con_ff
                elif self.do_massive_contraction:
                    logger.log(
                        20, '~~ DOING MASSIVE CONTRACTION ~~'.rjust(79, '~'))
                    for ff_num, ff in enumerate(self.new_ffs[1:]):
                        for i in xrange(0, len(best_ff.params)):
                            ff.params[i].value = (
                                (ff.params[i].value +
                                 self.new_ffs[0].params[i].value) / 2)
                        self.ff.export_ff(self.ff.path, params=ff.params)
                        data = calculate.main(self.args_ff)
                        ff.score = compare.compare_data(r_data, data)
                        ff.method += ' MC'
                        opt.pretty_ff_results(ff)
                else:
                    logger.log(
                        20, '  -- Contraction failed. Keeping parmaeters '
                        'anyway.')
                    self.new_ffs[-1] = con_ff
            self.new_ffs = sorted(self.new_ffs, key=lambda x: x.score)
            if self.new_ffs[0].score < last_best:
                cycles_wo_change = 0
            else:
                cycles_wo_change += 1
                logger.log(20, '  -- {} cycles without improvement out of {} '
                           'allowed.'.format(
                        cycles_wo_change, self._max_cycles_wo_change))
            best_ff = self.new_ffs[0]
            logger.log(20, 'BEST:')
            opt.pretty_ff_results(self.new_ffs[0], level=20)
            logger.log(20, '~~ END SIMPLEX CYCLE {} ~~'.format(
                    current_cycle).rjust(79, '~'))
        if best_ff.score < self.ff.score:
            logger.log(20, '~~ SIMPLEX FINISHED WITH IMPROVEMENTS ~~'.rjust(
                    79, '~'))
            best_ff = restore_simp_ff(best_ff, self.ff)
        else:
            logger.log(20, '~~ SIMPLEX FINISHED WITHOUT IMPROVEMENTS ~~'.rjust(
                    79, '~'))
        opt.pretty_ff_results(self.ff, level=20)
        opt.pretty_ff_results(best_ff, level=20)
        logger.log(20, '  -- Writing best force field from simplex.')
        best_ff.export_ff(best_ff.path)
        return best_ff
Пример #2
0
    def run(self, ref_data=None, restart=None):
        """
        Runs the gradient optimization.

        Ensure that the attributes in __init__ are set as you desire before
        using this function.

        Returns
        -------
        `datatypes.FF` (or subclass)
            Contains the best parameters.
        """
        # We need reference data if you didn't provide it.
        if ref_data is None:
            ref_data = opt.return_ref_data(self.args_ref)

        # We need the initial FF data.
        if self.ff.data is None:
            logger.log(20, '~~ GATHERING INITIAL FF DATA ~~'.rjust(79, '~'))
            # Is opt.Optimizer.ff_lines used anymore?
            self.ff.export_ff()
            self.ff.data = calculate.main(self.args_ff)
            # Not 100% sure if this is necessary, but it certainly doesn't hurt.
            compare.correlate_energies(ref_data, self.ff.data)
        r_dict = compare.data_by_type(ref_data)
        c_dict = compare.data_by_type(self.ff.data)
        r_dict, c_dict = compare.trim_data(r_dict, c_dict)
        if self.ff.score is None:
            # Already zeroed reference and correlated the energies.
            self.ff.score = compare.compare_data(r_dict, c_dict)
        data_types = []
        for typ in r_dict:
            data_types.append(typ)
        data_types.sort()
        logger.log(20, '~~ GRADIENT OPTIMIZATION ~~'.rjust(79, '~'))
        logger.log(20, 'INIT FF SCORE: {}'.format(self.ff.score))
        opt.pretty_ff_results(self.ff, level=20)

        logger.log(20, '~~ CENTRAL DIFFERENTIATION ~~'.rjust(79, '~'))
        if restart:
            par_file = restart
            logger.log(
                20, '  -- Restarting gradient from central '
                'differentiation file {}.'.format(par_file))
        else:
            # We need a file to hold the differentiated parameter data.
            par_files = glob.glob(os.path.join(self.direc, 'par_diff_???.txt'))
            if par_files:
                par_files.sort()
                most_recent_par_file = par_files[-1]
                most_recent_par_file = most_recent_par_file.split('/')[-1]
                most_recent_num = most_recent_par_file[9:12]
                num = int(most_recent_num) + 1
                par_file = 'par_diff_{:03d}.txt'.format(num)
            else:
                par_file = 'par_diff_001.txt'
            logger.log(
                20, '  -- Generating central differentiation '
                'file {}.'.format(par_file))
            f = open(os.path.join(self.direc, par_file), 'w')
            csv_writer = csv.writer(f)
            # Row 1 - Labels
            # Row 2 - Weights
            # Row 3 - Reference data values
            # Row 4 - Initial FF data values
            ## Deprecated -TR
            #csv_writer.writerow([x.lbl for x in ref_data])
            #csv_writer.writerow([x.wht for x in ref_data])
            #csv_writer.writerow([x.val for x in ref_data])
            #csv_writer.writerow([x.val for x in self.ff.data])
            writerows = [[], [], [], []]
            for data_type in data_types:
                writerows[0].extend([x.lbl for x in r_dict[data_type]])
                writerows[1].extend([x.wht for x in r_dict[data_type]])
                writerows[2].extend([x.val for x in r_dict[data_type]])
                writerows[3].extend([x.val for x in c_dict[data_type]])
            for row in writerows:
                csv_writer.writerow(row)
            logger.log(20, '~~ DIFFERENTIATING PARAMETERS ~~'.rjust(79, '~'))
            # Save many FFs, each with their own parameter sets.
            ffs = opt.differentiate_ff(self.ff)
            logger.log(
                20, '~~ SCORING DIFFERENTIATED PARAMETERS ~~'.rjust(79, '~'))
            for ff in ffs:
                ff.export_ff(lines=self.ff.lines)
                logger.log(20, '  -- Calculating {}.'.format(ff))
                data = calculate.main(self.args_ff)
                # Deprecated
                #ff.score = compare.compare_data(ref_data, data)
                c_data = compare.data_by_type(data)
                r_dict, c_data = compare.trim_data(r_dict, c_data)
                ff.score = compare.compare_data(r_dict, c_data)
                opt.pretty_ff_results(ff)
                # Write the data rather than storing it in memory. For large
                # parameter sets, this could consume GBs of memory otherwise!
                #csv_writer.writerow([x.val for x in data])
                row = []
                for data_type in data_types:
                    row.extend([x.val for x in c_data[data_type]])
                csv_writer.writerow(row)
            f.close()

            # Make sure we have derivative information. Used for NR.
            #
            # The derivatives are useful for checking up on the progress of the
            # optimization and for deciding which parameters to use in a
            # subsequent simplex optimization.
            #
            # Still need a way to do this with the resatrt file.
            opt.param_derivs(self.ff, ffs)

        # Calculate the Jacobian, residual vector, matrix A and vector b.
        # These aren't needed if you're only doing Newton-Raphson.
        if self.do_lstsq or self.do_lagrange or self.do_levenberg or \
                self.do_svd:
            logger.log(20, '~~ JACOBIAN AND RESIDUAL VECTOR ~~'.rjust(79, '~'))
            # Setup the residual vector.
            # Deprecated - TR
            #num_d = len(ref_data)
            num_d = 0
            for datatype in r_dict:
                num_d += len(r_dict[datatype])
            resid = np.empty((num_d, 1), dtype=float)
            # Deprecated - TR
            #for i in xrange(0, num_d):
            #    resid[i, 0] = ref_data[i].wht * \
            #                  (ref_data[i].val - self.ff.data[i].val)
            count = 0
            for data_type in data_types:
                for r, c in zip(r_dict[data_type], c_dict[data_type]):
                    resid[count, 0] = r.wht * (r.val - c.val)
                    count += 1
            # logger.log(5, 'RESIDUAL VECTOR:\n{}'.format(resid))
            logger.log(20,
                       '  -- Formed {} residual vector.'.format(resid.shape))
            # Setup the Jacobian.
            num_p = len(self.ff.params)
            # Maybe should be a part of the Jacobian function.
            jacob = np.empty((num_d, num_p), dtype=float)
            jacob = return_jacobian(jacob, os.path.join(self.direc, par_file))
            # logger.log(5, 'JACOBIAN:\n{}'.format(jacob))
            logger.log(20, '  -- Formed {} Jacobian.'.format(jacob.shape))
            ma = jacob.T.dot(jacob)
            vb = jacob.T.dot(resid)
            # We need these for most optimization methods.
            logger.log(5, ' MATRIX A AND VECTOR B '.center(79, '-'))
            # logger.log(5, 'A:\n{}'.format(ma))
            # logger.log(5, 'b:\n{}'.format(vb))
        # Start coming up with new parameter sets.
        if self.do_newton and not restart:
            logger.log(20, '~~ NEWTON-RAPHSON ~~'.rjust(79, '~'))
            # Moved the derivative section outside of here.
            changes = do_newton(self.ff.params,
                                radii=self.newton_radii,
                                cutoffs=self.newton_cutoffs)
            cleanup(self.new_ffs, self.ff, changes)
        if self.do_lstsq:
            logger.log(20, '~~ LEAST SQUARES ~~'.rjust(79, '~'))
            changes = do_lstsq(ma,
                               vb,
                               radii=self.lstsq_radii,
                               cutoffs=self.lstsq_cutoffs)
            cleanup(self.new_ffs, self.ff, changes)
        if self.do_lagrange:
            logger.log(20, '~~ LAGRANGE ~~'.rjust(79, '~'))
            for factor in sorted(self.lagrange_factors):
                changes = do_lagrange(ma,
                                      vb,
                                      factor,
                                      radii=self.lagrange_radii,
                                      cutoffs=self.lagrange_cutoffs)
                cleanup(self.new_ffs, self.ff, changes)
        if self.do_levenberg:
            logger.log(20, '~~ LEVENBERG ~~'.rjust(79, '~'))
            for factor in sorted(self.levenberg_factors):
                changes = do_levenberg(ma,
                                       vb,
                                       factor,
                                       radii=self.levenberg_radii,
                                       cutoffs=self.levenberg_cutoffs)
                cleanup(self.new_ffs, self.ff, changes)
        if self.do_svd:
            logger.log(20, '~~ SINGULAR VALUE DECOMPOSITION ~~'.rjust(79, '~'))
            # J = U . s . VT
            mu, vs, mvt = return_svd(jacob)
            logger.log(1, '>>> mu.shape: {}'.format(mu.shape))
            logger.log(1, '>>> vs.shape: {}'.format(vs.shape))
            logger.log(1, '>>> mvt.shape: {}'.format(mvt.shape))
            logger.log(1, '>>> vb.shape: {}'.format(vb.shape))
            if self.svd_factors:
                changes = do_svd_w_thresholds(mu,
                                              vs,
                                              mvt,
                                              resid,
                                              self.svd_factors,
                                              radii=self.svd_radii,
                                              cutoffs=self.svd_cutoffs)
            else:
                changes = do_svd_wo_thresholds(mu,
                                               vs,
                                               mvt,
                                               resid,
                                               radii=self.svd_radii,
                                               cutoffs=self.svd_cutoffs)
            cleanup(self.new_ffs, self.ff, changes)
        # Report how many trial FFs were generated.
        logger.log(
            20, '  -- Generated {} trial force field(s).'.format(
                len(self.new_ffs)))
        # If there are any trials, test them.
        if self.new_ffs:
            logger.log(20, '~~ EVALUATING TRIAL FF(S) ~~'.rjust(79, '~'))
            for ff in self.new_ffs:
                data = opt.cal_ff(ff, self.args_ff, parent_ff=self.ff)
                # Shouldn't need to zero anymore.
                # Deprecated
                #ff.score = compare.compare_data(ref_data, data)
                c_data = compare.data_by_type(data)
                r_dict, c_data = compare.trim_data(r_dict, c_data)
                ff.score = compare.compare_data(r_dict, c_data)
                opt.pretty_ff_results(ff)
            self.new_ffs = sorted(self.new_ffs, key=lambda x: x.score)
            # Check for improvement.
            if self.new_ffs[0].score < self.ff.score:
                ff = self.new_ffs[0]
                logger.log(
                    20,
                    '~~ GRADIENT FINISHED WITH IMPROVEMENTS ~~'.rjust(79, '~'))
                opt.pretty_ff_results(self.ff, level=20)
                opt.pretty_ff_results(ff, level=20)
                # Copy parameter derivatives from original FF to save time in
                # case we move onto simplex immediately after this.
                copy_derivs(self.ff, ff)
            else:
                ff = self.ff
        else:
            ff = self.ff
        ff.export_ff(ff.path)
        return ff
Пример #3
0
    def run(self, r_data=None):
        """
        Once all attributes are setup as you so desire, run this method to
        optimize the parameters.

        Returns
        -------
        `datatypes.FF` (or subclass)
            Contains the best parameters.
        """
        if r_data is None:
            r_data = opt.return_ref_data(self.args_ref)
        logger.log(20, '~~ SIMPLEX OPTIMIZATION ~~'.rjust(79, '~'))
        # Here we don't actually need the database connection/force field data.
        # We only need the score.
        if self.ff.score is None:
            logger.log(20, '~~ CALCULATING INITIAL FF SCORE ~~'.rjust(79, '~'))
            self.ff.export_ff()
            # I could store this object to prevent on self.ff to prevent garbage
            # collection. Would be nice if simplex was followed by gradient,
            # which needs that information, and if simplex yielded no
            # improvements. At most points in the optimization, this is probably
            # too infrequent for it to be worth the memory, but it might be nice
            # once the parameters are close to convergence.
            data = calculate.main(self.args_ff)
            self.ff.score = compare.compare_data(r_data, data, zero=False)
            logger.log(20, 'INITIAL FF SCORE: {}'.format(self.ff.score))
        else:
            logger.log(15, '  -- Reused existing score and data for initial FF.')
        logger.log(15, 'INIT FF SCORE: {}'.format(self.ff.score))
        ffs = opt.differentiate_ff(self.ff)
        for ff in ffs:
            ff.export_ff(lines=self.ff_lines)
            logger.log(20, '  -- Calculating {}.'.format(ff))
            data = calculate.main(self.args_ff)
            ff.score = compare.compare_data(r_data, data, zero=False)
            opt.pretty_ff_results(ff)
        if self.max_params and len(self.ff.params) > self.max_params:
            simp_params = reduce_num_simp_params(
                self.ff, ffs, max_params=self.max_params)
            self.new_ffs = reduce_num_simp_ffs(
                ffs, simp_params)
        else:
            self.new_ffs = ffs
        self.new_ffs = sorted(self.new_ffs + [self.ff], key=lambda x: x.score)
        wrapper = textwrap.TextWrapper(width=79)
        logger.log(20, 'ORDERED FF SCORES:')
        logger.log(20, wrapper.fill('{}'.format(
                ' '.join('{:15.4f}'.format(x.score) for x in self.new_ffs))))
        # Shows all FFs parameters.
        opt.pretty_ff_params(self.new_ffs)
        # Start the simplex cycles.
        current_cycle = 0
        cycles_wo_change = 0
        while current_cycle < self.max_cycles \
                and cycles_wo_change < self.max_cycles_wo_change:
            current_cycle += 1
            last_best = self.new_ffs[0].score
            best_ff = self.new_ffs[0]
            logger.log(20, '~~ START SIMPLEX CYCLE {} ~~'.format(
                    current_cycle).rjust(79, '~'))
            inv_ff = self.ff.__class__()
            if self.do_weighted_reflection:
                inv_ff.method = 'WEIGHTED INVERSION'
            else:
                inv_ff.method = 'INVERSION'
            inv_ff.params = copy.deepcopy(best_ff.params)
            ref_ff = self.ff.__class__()
            ref_ff.method = 'REFLECTION'
            ref_ff.params = copy.deepcopy(best_ff.params)
            for i in xrange(0, len(best_ff.params)):
                if self.do_weighted_reflection:
                    try:
                        inv_val = (
                            sum([x.params[i].value *
                                 (x.score - self.new_ffs[-1].score)
                                 for x in self.new_ffs[:-1]])
                            / 
                            sum([x.score - self.new_ffs[-1].score
                                 for x in self.new_ffs[:-1]]))
                    except ZeroDivisionError:
                        logger.warning(
                            'Attempted to divide by zero while calculating the '
                            'weighted simplex inversion point. All penalty '
                            'function scores for the trial force fields are '
                            'numerically equivalent.')
                        # Breaking should just exit the while loop. Should still
                        # give you the best force field determined thus far.
                        break
                else:
                    inv_val = (
                        sum([x.params[i].value for x in self.new_ffs[:-1]])
                        /
                        len(self.new_ffs[:-1]))
                inv_ff.params[i].value = inv_val
                ref_ff.params[i].value = (
                    2 * inv_val - self.new_ffs[-1].params[i].value)
            # Calculate score for inverted parameters.
            self.ff.export_ff(self.ff.path, params=inv_ff.params)
            data = calculate.main(self.args_ff)
            inv_ff.score = compare.compare_data(r_data, data, zero=False)
            opt.pretty_ff_results(inv_ff)
            # Calculate score for reflected parameters.
            self.ff.export_ff(self.ff.path, params=ref_ff.params)
            data = calculate.main(self.args_ff)
            ref_ff.score = compare.compare_data(r_data, data, zero=False)
            opt.pretty_ff_results(ref_ff)
            if ref_ff.score < self.new_ffs[0].score:
                logger.log(20, '~~ ATTEMPTING EXPANSION ~~'.rjust(79, '~'))
                exp_ff = self.ff.__class__()
                exp_ff.method = 'EXPANSION'
                exp_ff.params = copy.deepcopy(best_ff.params)
                for i in xrange(0, len(self.new_ffs[0].params)):
                    exp_ff.params[i].value = (
                        3 * inv_ff.params[i].value -
                        2 * self.new_ffs[-1].params[i].value)
                self.ff.export_ff(self.ff.path, exp_ff.params)
                data = calculate.main(self.args_ff)
                exp_ff.score = compare.compare_data(r_data, data, zero=False)
                opt.pretty_ff_results(exp_ff)
                if exp_ff.score < ref_ff.score:
                    self.new_ffs[-1] = exp_ff
                    logger.log(
                        20, '  -- Expansion succeeded. Keeping expanded '
                        'parameters.')
                else:
                    self.new_ffs[-1] = ref_ff
                    logger.log(
                        20, '  -- Expansion failed. Keeping reflected parameters.')
            elif ref_ff.score < self.new_ffs[-2].score:
                logger.log(20, '  -- Keeping reflected parameters.')
                self.new_ffs[-1] = ref_ff
            else:
                logger.log(20, '~~ ATTEMPTING CONTRACTION ~~'.rjust(79, '~'))
                con_ff = self.ff.__class__()
                con_ff.method = 'CONTRACTION'
                con_ff.params = copy.deepcopy(best_ff.params)
                for i in xrange(0, len(best_ff.params)):
                    if ref_ff.score > self.new_ffs[-1].score:
                        con_val = (
                            (inv_ff.params[i].value +
                             self.new_ffs[-1].params[i].value) / 2)
                    else:
                        con_val = (
                            (3 * inv_ff.params[i].value -
                             self.new_ffs[-1].params[i].value) / 2)
                    con_ff.params[i].value = con_val
                self.ff.export_ff(self.ff.path, params=con_ff.params)
                data = calculate.main(self.args_ff)
                con_ff.score = compare.compare_data(r_data, data, zero=False)
                opt.pretty_ff_results(con_ff)
                if con_ff.score < self.new_ffs[-2].score:
                    self.new_ffs[-1] = con_ff
                elif self.do_massive_contraction:
                    logger.log(
                        20, '~~ DOING MASSIVE CONTRACTION ~~'.rjust(79, '~'))
                    for ff_num, ff in enumerate(self.new_ffs[1:]):
                        for i in xrange(0, len(best_ff.params)):
                            ff.params[i].value = (
                                (ff.params[i].value +
                                 self.new_ffs[0].params[i].value) / 2)
                        self.ff.export_ff(self.ff.path, params=ff.params)
                        data = calculate.main(self.args_ff)
                        ff.score = compare.compare_data(r_data, data, zero=False)
                        ff.method += ' MC'
                        opt.pretty_ff_results(ff)
                else:
                    logger.log(20, '  -- Contraction failed.')
            self.new_ffs = sorted(self.new_ffs, key=lambda x: x.score)
            if self.new_ffs[0].score < last_best:
                cycles_wo_change = 0
            else:
                cycles_wo_change += 1
                logger.log(20, '  -- {} cycles without change.'.format(
                        cycles_wo_change))
            best_ff = self.new_ffs[0]
            logger.log(20, 'BEST:')
            opt.pretty_ff_results(self.new_ffs[0], level=20)
            logger.log(20, '~~ END SIMPLEX CYCLE {} ~~'.format(
                    current_cycle).rjust(79, '~'))
        if best_ff.score < self.ff.score:
            logger.log(20, '~~ SIMPLEX FINISHED WITH IMPROVEMENTS ~~'.rjust(
                    79, '~'))
            best_ff = restore_simp_ff(best_ff, self.ff)
        else:
            logger.log(20, '~~ SIMPLEX FINISHED WITHOUT IMPROVEMENTS ~~'.rjust(
                    79, '~'))
        opt.pretty_ff_results(self.ff, level=20)
        opt.pretty_ff_results(best_ff, level=20)
        logger.log(20, '  -- Writing best force field from simplex.')
        best_ff.export_ff(best_ff.path)
        return best_ff
Пример #4
0
    def run(self, ref_data=None, restart=None):
        """
        Runs the gradient optimization.

        Ensure that the attributes in __init__ are set as you desire before
        using this function.

        Returns
        -------
        `datatypes.FF` (or subclass)
            Contains the best parameters.
        """
        # We need reference data if you didn't provide it.
        if ref_data is None:
            ref_data = opt.return_ref_data(self.args_ref)

        # We need the initial FF data.
        if self.ff.data is None:
            logger.log(20, '~~ GATHERING INITIAL FF DATA ~~'.rjust(79, '~'))
            # Is opt.Optimizer.ff_lines used anymore?
            self.ff.export_ff()
            self.ff.data = calculate.main(self.args_ff)
            # Not 100% sure if this is necessary, but it certainly doesn't hurt.
            compare.correlate_energies(ref_data, self.ff.data)
        r_dict = compare.data_by_type(ref_data)
        c_dict = compare.data_by_type(self.ff.data)
        r_dict, c_dict = compare.trim_data(r_dict,c_dict)
        if self.ff.score is None:
            # Already zeroed reference and correlated the energies.
            self.ff.score = compare.compare_data(r_dict, c_dict)
        data_types = []
        for typ in r_dict:
            data_types.append(typ)
        data_types.sort()
        logger.log(20, '~~ GRADIENT OPTIMIZATION ~~'.rjust(79, '~'))
        logger.log(20, 'INIT FF SCORE: {}'.format(self.ff.score))
        opt.pretty_ff_results(self.ff, level=20)

        logger.log(20, '~~ CENTRAL DIFFERENTIATION ~~'.rjust(79, '~'))
        if restart:
            par_file = restart
            logger.log(20, '  -- Restarting gradient from central '
                       'differentiation file {}.'.format(par_file))
        else:
            # We need a file to hold the differentiated parameter data.
            par_files = glob.glob(os.path.join(self.direc, 'par_diff_???.txt'))
            if par_files:
                par_files.sort()
                most_recent_par_file = par_files[-1]
                most_recent_par_file = most_recent_par_file.split('/')[-1]
                most_recent_num = most_recent_par_file[9:12]
                num = int(most_recent_num) + 1
                par_file = 'par_diff_{:03d}.txt'.format(num)
            else:
                par_file = 'par_diff_001.txt'
            logger.log(20, '  -- Generating central differentiation '
                       'file {}.'.format(par_file))
            f = open(os.path.join(self.direc, par_file), 'w')
            csv_writer = csv.writer(f)
            # Row 1 - Labels
            # Row 2 - Weights
            # Row 3 - Reference data values
            # Row 4 - Initial FF data values
            ## Deprecated -TR
            #csv_writer.writerow([x.lbl for x in ref_data])
            #csv_writer.writerow([x.wht for x in ref_data])
            #csv_writer.writerow([x.val for x in ref_data])
            #csv_writer.writerow([x.val for x in self.ff.data])
            writerows = [[],[],[],[]]
            for data_type in data_types:
                writerows[0].extend([x.lbl for x in r_dict[data_type]])
                writerows[1].extend([x.wht for x in r_dict[data_type]])
                writerows[2].extend([x.val for x in r_dict[data_type]])
                writerows[3].extend([x.val for x in c_dict[data_type]])
            for row in writerows:
                csv_writer.writerow(row)
            logger.log(20, '~~ DIFFERENTIATING PARAMETERS ~~'.rjust(79, '~'))
            # Save many FFs, each with their own parameter sets.
            ffs = opt.differentiate_ff(self.ff)
            logger.log(20, '~~ SCORING DIFFERENTIATED PARAMETERS ~~'.rjust(
                79, '~'))
            for ff in ffs:
                ff.export_ff(lines=self.ff.lines)
                logger.log(20, '  -- Calculating {}.'.format(ff))
                data = calculate.main(self.args_ff)
                # Deprecated
                #ff.score = compare.compare_data(ref_data, data)
                c_data = compare.data_by_type(data)
                r_dict, c_data = compare.trim_data(r_dict,c_data)
                ff.score = compare.compare_data(r_dict, c_data)
                opt.pretty_ff_results(ff)
                # Write the data rather than storing it in memory. For large
                # parameter sets, this could consume GBs of memory otherwise!
                #csv_writer.writerow([x.val for x in data])
                row = []
                for data_type in data_types:
                    row.extend([x.val for x in c_data[data_type]])
                csv_writer.writerow(row)
            f.close()

            # Make sure we have derivative information. Used for NR.
            #
            # The derivatives are useful for checking up on the progress of the
            # optimization and for deciding which parameters to use in a
            # subsequent simplex optimization.
            #
            # Still need a way to do this with the resatrt file.
            opt.param_derivs(self.ff, ffs)

        # Calculate the Jacobian, residual vector, matrix A and vector b.
        # These aren't needed if you're only doing Newton-Raphson.
        if self.do_lstsq or self.do_lagrange or self.do_levenberg or \
                self.do_svd:
            logger.log(20, '~~ JACOBIAN AND RESIDUAL VECTOR ~~'.rjust(79, '~'))
            # Setup the residual vector.
            # Deprecated - TR
            #num_d = len(ref_data)
            num_d = 0
            for datatype in r_dict:
                num_d += len(r_dict[datatype])
            resid = np.empty((num_d, 1), dtype=float)
            # Deprecated - TR
            #for i in xrange(0, num_d):
            #    resid[i, 0] = ref_data[i].wht * \
            #                  (ref_data[i].val - self.ff.data[i].val)
            count = 0
            for data_type in data_types:
                for r,c in zip(r_dict[data_type],c_dict[data_type]):
                    resid[count, 0] = r.wht * (r.val - c.val)
                    count += 1
            # logger.log(5, 'RESIDUAL VECTOR:\n{}'.format(resid))
            logger.log(
                20, '  -- Formed {} residual vector.'.format(resid.shape))
            # Setup the Jacobian.
            num_p = len(self.ff.params)
            # Maybe should be a part of the Jacobian function.
            jacob = np.empty((num_d, num_p), dtype=float)
            jacob = return_jacobian(jacob, os.path.join(self.direc, par_file))
            # logger.log(5, 'JACOBIAN:\n{}'.format(jacob))
            logger.log(20, '  -- Formed {} Jacobian.'.format(jacob.shape))
            ma = jacob.T.dot(jacob)
            vb = jacob.T.dot(resid)
            # We need these for most optimization methods.
            logger.log(5, ' MATRIX A AND VECTOR B '.center(79, '-'))
            # logger.log(5, 'A:\n{}'.format(ma))
            # logger.log(5, 'b:\n{}'.format(vb))
        # Start coming up with new parameter sets.
        if self.do_newton and not restart:
            logger.log(20, '~~ NEWTON-RAPHSON ~~'.rjust(79, '~'))
            # Moved the derivative section outside of here.
            changes = do_newton(self.ff.params,
                                radii=self.newton_radii,
                                cutoffs=self.newton_cutoffs)
            cleanup(self.new_ffs, self.ff, changes)
        if self.do_lstsq:
            logger.log(20, '~~ LEAST SQUARES ~~'.rjust(79, '~'))
            changes = do_lstsq(ma, vb,
                               radii=self.lstsq_radii,
                               cutoffs=self.lstsq_cutoffs)
            cleanup(self.new_ffs, self.ff, changes)
        if self.do_lagrange:
            logger.log(20, '~~ LAGRANGE ~~'.rjust(79, '~'))
            for factor in sorted(self.lagrange_factors):
                changes = do_lagrange(ma, vb, factor,
                                      radii=self.lagrange_radii,
                                      cutoffs=self.lagrange_cutoffs)
                cleanup(self.new_ffs, self.ff, changes)
        if self.do_levenberg:
            logger.log(20, '~~ LEVENBERG ~~'.rjust(79, '~'))
            for factor in sorted(self.levenberg_factors):
                changes = do_levenberg(ma, vb, factor,
                                       radii=self.levenberg_radii,
                                       cutoffs=self.levenberg_cutoffs)
                cleanup(self.new_ffs, self.ff, changes)
        if self.do_svd:
            logger.log(20, '~~ SINGULAR VALUE DECOMPOSITION ~~'.rjust(79, '~'))
            # J = U . s . VT
            mu, vs, mvt = return_svd(jacob)
            logger.log(1, '>>> mu.shape: {}'.format(mu.shape))
            logger.log(1, '>>> vs.shape: {}'.format(vs.shape))
            logger.log(1, '>>> mvt.shape: {}'.format(mvt.shape))
            logger.log(1, '>>> vb.shape: {}'.format(vb.shape))
            if self.svd_factors:
                changes = do_svd_w_thresholds(mu, vs, mvt, resid, self.svd_factors,
                                              radii=self.svd_radii,
                                              cutoffs=self.svd_cutoffs)
            else:
                changes = do_svd_wo_thresholds(mu, vs, mvt, resid,
                                               radii=self.svd_radii,
                                               cutoffs=self.svd_cutoffs)
            cleanup(self.new_ffs, self.ff, changes)
        # Report how many trial FFs were generated.
        logger.log(20, '  -- Generated {} trial force field(s).'.format(
                len(self.new_ffs)))
        # If there are any trials, test them.
        if self.new_ffs:
            logger.log(20, '~~ EVALUATING TRIAL FF(S) ~~'.rjust(79, '~'))
            for ff in self.new_ffs:
                data = opt.cal_ff(ff, self.args_ff, parent_ff=self.ff)
                # Shouldn't need to zero anymore.
                # Deprecated
                #ff.score = compare.compare_data(ref_data, data)
                c_data = compare.data_by_type(data)
                r_dict, c_data = compare.trim_data(r_dict,c_data)
                ff.score = compare.compare_data(r_dict, c_data)
                opt.pretty_ff_results(ff)
            self.new_ffs = sorted(
                self.new_ffs, key=lambda x: x.score)
            # Check for improvement.
            if self.new_ffs[0].score < self.ff.score:
                ff = self.new_ffs[0]
                logger.log(
                    20, '~~ GRADIENT FINISHED WITH IMPROVEMENTS ~~'.rjust(
                        79, '~'))
                opt.pretty_ff_results(self.ff, level=20)
                opt.pretty_ff_results(ff, level=20)
                # Copy parameter derivatives from original FF to save time in
                # case we move onto simplex immediately after this.
                copy_derivs(self.ff, ff)
            else:
                ff = self.ff
        else:
            ff = self.ff
        return ff
Пример #5
0
 def run(self, ref_data=None):
     # We need reference data if you didn't provide it.
     if ref_data is None:
         ref_data = opt.return_ref_data(self.args_ref)
     # We need the initial FF data.
     if self.ff.data is None:
         logger.log(20, '~~ GATHERING INITIAL FF DATA ~~'.rjust(79, '~'))
         # Check whether this is efficient with the ff_lines.
         self.ff.export_ff()
         self.ff.data = calculate.main(self.args_ff)
         # We could do this, but the zeroing of energies has already been
         # done.
         # self.ff.score = compare.compare_data(ref_data, self.ff.data)
         # So instead we do this.
         compare.correlate_energies(ref_data, self.ff.data)
     if self.ff.score is None:
         # Already zeroed reference and correlated the energies.
         self.ff.score = compare.calculate_score(ref_data, self.ff.data)
         logger.log(20, 'INITIAL FF SCORE: {}'.format(self.ff.fscore))
     logger.log(20, '~~ GRADIENT OPTIMIZATION ~~'.rjust(79, '~'))
     # We need a file to hold the differentiated parameter data.
     par_files = glob.glob(os.path.join(self.direc, 'par_diff_???.txt'))
     if par_files:
         par_files.sort()
         most_recent_par_file = par_files[-1]
         most_recent_par_file = most_recent_par_file.split('/')[-1]
         most_recent_num = most_recent_par_file[9:12]
         num = int(most_recent_num) + 1
         par_file = 'par_diff_{:03d}.txt'.format(num)
     else:
         par_file = 'par_diff_001.txt'
     f = open(os.path.join(self.direc, par_file), 'w')
     csv_writer = csv.writer(f)
     # Row 1 - Labels
     # Row 2 - Weights
     # Row 3 - Reference data values
     # Row 4 - Initial FF data values
     csv_writer.writerow([x.lbl for x in ref_data])
     csv_writer.writerow([x.wht for x in ref_data])
     csv_writer.writerow([x.val for x in ref_data])
     csv_writer.writerow([x.val for x in self.ff.data])
     logger.log(20, '~~ DIFFERENTIATING PARAMETERS ~~'.rjust(79, '~'))
     # Setup the residual vector.
     # Perhaps move this closer to the Jacobian section.
     num_d = len(ref_data)
     resid = np.empty((num_d, 1), dtype=float)
     for i in xrange(0, num_d):
         resid[i, 0] = ref_data[i].wht * (ref_data[i].val - self.ff.data[i].val)
     logger.log(5, 'RESIDUAL VECTOR:\n{}'.format(resid))
     logger.log(20, '  -- Formed {} residual vector.'.format(resid.shape))
     # Save many FFs, each with their own parameter sets.
     ffs = opt.differentiate_ff(self.ff)
     logger.log(20, '~~ SCORING DIFFERENTIATED PARAMETERS ~~'.rjust(79, '~'))
     for ff in ffs:
         ff.export_ff(lines=self.ff.lines)
         logger.log(20, '  -- Calculating {}.'.format(ff))
         data = calculate.main(self.args_ff)
         compare.correlate_energies(ref_data, data)
         ff.score = compare.calculate_score(ref_data, data)
         opt.pretty_ff_results(ff)
         # Write the data rather than storing it in memory. For large parameter
         # sets, this could consume GBs of memory otherwise!
         csv_writer.writerow([x.val for x in data])
     f.close()
     # Calculate the Jacobian, residual vector, matrix A and vector b.
     # These aren't needed if you're only doing Newton-Raphson.
     if self.do_lstsq or self.do_lagrange or self.do_levenberg or \
             self.do_svd:
         logger.log(20, '~~ JACOBIAN AND RESIDUAL VECTOR ~~'.rjust(79, '~'))
         # Setup the Jacobian.
         num_p = len(self.ff.params)
         # Maybe should be a part of the Jacobian function.
         jacob = np.empty((num_d, num_p), dtype=float)
         jacob = return_jacobian(jacob, os.path.join(self.direc, par_file))
         ma = jacob.T.dot(jacob)
         vb = jacob.T.dot(resid)
         # We need these for most optimization methods.
         logger.log(5, ' MATRIX A AND VECTOR B '.center(79, '-'))
         logger.log(5, 'A:\n{}'.format(ma))
         logger.log(5, 'b:\n{}'.format(vb))
     # Start coming up with new parameter sets.
     if self.do_newton:
         logger.log(20, '~~ NEWTON-RAPHSON ~~'.rjust(79, '~'))
         # Make sure we have derivative information.
         if self.ff.params[0].d1 is None:
             opt.param_derivs(self.ff, ffs)
         changes = do_newton(self.ff.params,
                             radii=self.newton_radii,
                             cutoffs=self.newton_cutoffs)
         cleanup(self.new_ffs, self.ff, changes)
     if self.do_lstsq:
         logger.log(20, '~~ LEAST SQUARES ~~'.rjust(79, '~'))
         changes = do_lstsq(ma, vb,                               
                            radii=self.lstsq_radii,
                            cutoffs=self.lstsq_cutoffs)
         cleanup(self.new_ffs, self.ff, changes)
     if self.do_lagrange:
         logger.log(20, '~~ LAGRANGE ~~'.rjust(79, '~'))
         for factor in sorted(self.lagrange_factors):
             changes = do_lagrange(ma, vb, factor,
                                   radii=self.lagrange_radii,
                                   cutoffs=self.lagrange_cutoffs)
             cleanup(self.new_ffs, self.ff, changes)
     if self.do_levenberg:
         logger.log(20, '~~ LEVENBERG ~~'.rjust(79, '~'))
         for factor in sorted(self.levenberg_factors):
             changes = do_levenberg(ma, vb, factor,
                                    radii=self.levenberg_radii,
                                    cutoffs=self.levenberg_cutoffs)
             cleanup(self.new_ffs, self.ff, changes)
     if self.do_svd:
         logger.log(20, '~~ SINGULAR VALUE DECOMPOSITION ~~'.rjust(79, '~'))
         mu, vs, mv = return_svd(ma)
         if self.svd_factors:
             changes = do_svd_w_thresholds(mu, vs, mv, vb, self.svd_factors,
                                           radii=self.svd_radii,
                                           cutoffs=self.svd_cutoffs)
         else:
             changes = do_svd_wo_thresholds(mu, vs, mv, vb,
                                            radii=self.svd_radii,
                                            cutoffs=self.svd_cutoffs)
         cleanup(self.new_ffs, self.ff, changes)
     # Report how many trial FFs were generated.
     logger.log(20, '  -- Generated {} trial force field(s).'.format(
             len(self.new_ffs)))
     # If there are any trials, test them.
     if self.new_ffs:
         logger.log(20, '~~ EVALUATING TRIAL FF(S) ~~'.rjust(79, '~'))
         for ff in self.new_ffs:
             data = opt.cal_ff(ff, self.args_ff, parent_ff=self.ff)
             ff.score = compare.compare_data(ref_data, data, zero=False)
             opt.pretty_ff_results(ff)
         self.new_ffs = sorted(
             self.new_ffs, key=lambda x: x.score)
         # Check for improvement.
         if self.new_ffs[0].score < self.ff.score:
             ff = self.new_ffs[0]
             logger.log(
                 20, '~~ GRADIENT FINISHED WITH IMPROVEMENTS ~~'.rjust(
                     79, '~'))
             opt.pretty_ff_results(self.ff, level=20)
             opt.pretty_ff_results(ff, level=20)
         else:
             ff = self.ff
     else:
         ff = self.ff
     return ff
Пример #6
0
    def run(self, r_data=None):
        """
        Once all attributes are setup as you so desire, run this method to
        optimize the parameters.

        Returns
        -------
        `datatypes.FF` (or subclass)
            Contains the best parameters.
        """
        if r_data is None:
            r_data = opt.return_ref_data(self.args_ref)
        logger.log(20, '~~ SIMPLEX OPTIMIZATION ~~'.rjust(79, '~'))
        # Here we don't actually need the database connection/force field data.
        # We only need the score.
        if self.ff.score is None:
            logger.log(20, '~~ CALCULATING INITIAL FF SCORE ~~'.rjust(79, '~'))
            self.ff.export_ff()
            # I could store this object to prevent on self.ff to prevent garbage
            # collection. Would be nice if simplex was followed by gradient,
            # which needs that information, and if simplex yielded no
            # improvements. At most points in the optimization, this is probably
            # too infrequent for it to be worth the memory, but it might be nice
            # once the parameters are close to convergence.
            data = calculate.main(self.args_ff)
            self.ff.score = compare.compare_data(r_data, data)
            logger.log(20, 'INITIAL FF SCORE: {}'.format(self.ff.score))
        else:
            logger.log(15,
                       '  -- Reused existing score and data for initial FF.')
        logger.log(15, 'INIT FF SCORE: {}'.format(self.ff.score))
        if self.max_params and len(self.ff.params) > self.max_params:
            if self.ff.params[0].d1:
                logger.log(15, '  -- Reusing existing parameter derivatives.')
                # Don't score so this really doesn't take much time.
                ffs = opt.differentiate_ff(self.ff, central=False)
            else:
                logger.log(15, '  -- Calculating new parameter derivatives.')
                ffs = opt.differentiate_ff(self.ff, central=True)
                # We have to score to get the derivatives.
                for ff in ffs:
                    ff.export_ff(lines=self.ff_lines)
                    logger.log(20, '  -- Calculating {}.'.format(ff))
                    data = calculate.main(self.args_ff)
                    ff.score = compare.compare_data(r_data, data)
                    opt.pretty_ff_results(ff)
                opt.param_derivs(self.ff, ffs)
                # Only keep the forward differentiated FFs.
                ffs = opt.extract_forward(ffs)
            params = select_simp_params_on_derivs(self.ff.params,
                                                  max_params=self.max_params)
            self.new_ffs = opt.extract_ff_by_params(ffs, params)
            # Reduce number of parameters.
            # Will need an option that's not MM3* specific.
            ff_rows = [x.mm3_row for x in params]
            ff_cols = [x.mm3_col for x in params]
            for ff in self.new_ffs:
                new_params = []
                for param in ff.params:
                    if param.mm3_row in ff_rows and param.mm3_col in ff_cols:
                        new_params.append(param)
                ff.params = new_params
        else:
            self.new_ffs = opt.differentiate_ff(self.ff, central=False)
        # Double check and make sure they're all scored.
        for ff in self.new_ffs:
            if ff.score is None:
                ff.export_ff(lines=self.ff_lines)
                logger.log(20, '  -- Calculating {}.'.format(ff))
                data = calculate.main(self.args_ff)
                ff.score = compare.compare_data(r_data, data)
                opt.pretty_ff_results(ff)
        ff_copy = copy.deepcopy(self.ff)
        new_params = []
        for param in ff.params:
            if param.mm3_row in ff_rows and param.mm3_col in ff_cols:
                new_params.append(param)
        ff_copy.params = new_params
        self.new_ffs = sorted(self.new_ffs + [ff_copy], key=lambda x: x.score)
        wrapper = textwrap.TextWrapper(width=79)
        logger.log(20, 'ORDERED FF SCORES:')
        logger.log(
            20,
            wrapper.fill('{}'.format(' '.join('{:15.4f}'.format(x.score)
                                              for x in self.new_ffs))))
        # Shows all FFs parameters.
        opt.pretty_ff_params(self.new_ffs)
        # Start the simplex cycles.
        current_cycle = 0
        cycles_wo_change = 0
        while current_cycle < self.max_cycles \
                and cycles_wo_change < self.max_cycles_wo_change:
            current_cycle += 1
            last_best = self.new_ffs[0].score
            best_ff = self.new_ffs[0]
            logger.log(
                20, '~~ START SIMPLEX CYCLE {} ~~'.format(current_cycle).rjust(
                    79, '~'))
            inv_ff = self.ff.__class__()
            if self.do_weighted_reflection:
                inv_ff.method = 'WEIGHTED INVERSION'
            else:
                inv_ff.method = 'INVERSION'
            inv_ff.params = copy.deepcopy(best_ff.params)
            ref_ff = self.ff.__class__()
            ref_ff.method = 'REFLECTION'
            ref_ff.params = copy.deepcopy(best_ff.params)
            for i in xrange(0, len(best_ff.params)):
                if self.do_weighted_reflection:
                    try:
                        inv_val = (sum([
                            x.params[i].value *
                            (x.score - self.new_ffs[-1].score)
                            for x in self.new_ffs[:-1]
                        ]) / sum([
                            x.score - self.new_ffs[-1].score
                            for x in self.new_ffs[:-1]
                        ]))
                    except ZeroDivisionError:
                        logger.warning(
                            'Attempted to divide by zero while calculating the '
                            'weighted simplex inversion point. All penalty '
                            'function scores for the trial force fields are '
                            'numerically equivalent.')
                        # Breaking should just exit the while loop. Should still
                        # give you the best force field determined thus far.
                        break
                else:
                    inv_val = (
                        sum([x.params[i].value for x in self.new_ffs[:-1]]) /
                        len(self.new_ffs[:-1]))
                inv_ff.params[i].value = inv_val
                ref_ff.params[i].value = (2 * inv_val -
                                          self.new_ffs[-1].params[i].value)
            # Calculate score for inverted parameters.
            self.ff.export_ff(self.ff.path, params=inv_ff.params)
            data = calculate.main(self.args_ff)
            inv_ff.score = compare.compare_data(r_data, data)
            opt.pretty_ff_results(inv_ff)
            # Calculate score for reflected parameters.
            self.ff.export_ff(self.ff.path, params=ref_ff.params)
            data = calculate.main(self.args_ff)
            ref_ff.score = compare.compare_data(r_data, data)
            opt.pretty_ff_results(ref_ff)
            if ref_ff.score < self.new_ffs[0].score:
                logger.log(20, '~~ ATTEMPTING EXPANSION ~~'.rjust(79, '~'))
                exp_ff = self.ff.__class__()
                exp_ff.method = 'EXPANSION'
                exp_ff.params = copy.deepcopy(best_ff.params)
                for i in xrange(0, len(self.new_ffs[0].params)):
                    exp_ff.params[i].value = (
                        3 * inv_ff.params[i].value -
                        2 * self.new_ffs[-1].params[i].value)
                self.ff.export_ff(self.ff.path, exp_ff.params)
                data = calculate.main(self.args_ff)
                exp_ff.score = compare.compare_data(r_data, data)
                opt.pretty_ff_results(exp_ff)
                if exp_ff.score < ref_ff.score:
                    self.new_ffs[-1] = exp_ff
                    logger.log(
                        20, '  -- Expansion succeeded. Keeping expanded '
                        'parameters.')
                else:
                    self.new_ffs[-1] = ref_ff
                    logger.log(
                        20,
                        '  -- Expansion failed. Keeping reflected parameters.')
            elif ref_ff.score < self.new_ffs[-2].score:
                logger.log(20, '  -- Keeping reflected parameters.')
                self.new_ffs[-1] = ref_ff
            else:
                logger.log(20, '~~ ATTEMPTING CONTRACTION ~~'.rjust(79, '~'))
                con_ff = self.ff.__class__()
                con_ff.method = 'CONTRACTION'
                con_ff.params = copy.deepcopy(best_ff.params)
                for i in xrange(0, len(best_ff.params)):
                    if ref_ff.score > self.new_ffs[-1].score:
                        con_val = ((inv_ff.params[i].value +
                                    self.new_ffs[-1].params[i].value) / 2)
                    else:
                        con_val = ((3 * inv_ff.params[i].value -
                                    self.new_ffs[-1].params[i].value) / 2)
                    con_ff.params[i].value = con_val
                self.ff.export_ff(self.ff.path, params=con_ff.params)
                data = calculate.main(self.args_ff)
                con_ff.score = compare.compare_data(r_data, data)
                opt.pretty_ff_results(con_ff)
                if con_ff.score < self.new_ffs[-2].score:
                    self.new_ffs[-1] = con_ff
                elif self.do_massive_contraction:
                    logger.log(
                        20, '~~ DOING MASSIVE CONTRACTION ~~'.rjust(79, '~'))
                    for ff_num, ff in enumerate(self.new_ffs[1:]):
                        for i in xrange(0, len(best_ff.params)):
                            ff.params[i].value = (
                                (ff.params[i].value +
                                 self.new_ffs[0].params[i].value) / 2)
                        self.ff.export_ff(self.ff.path, params=ff.params)
                        data = calculate.main(self.args_ff)
                        ff.score = compare.compare_data(r_data, data)
                        ff.method += ' MC'
                        opt.pretty_ff_results(ff)
                else:
                    logger.log(20, '  -- Contraction failed.')
            self.new_ffs = sorted(self.new_ffs, key=lambda x: x.score)
            if self.new_ffs[0].score < last_best:
                cycles_wo_change = 0
            else:
                cycles_wo_change += 1
                logger.log(
                    20,
                    '  -- {} cycles without change.'.format(cycles_wo_change))
            best_ff = self.new_ffs[0]
            logger.log(20, 'BEST:')
            opt.pretty_ff_results(self.new_ffs[0], level=20)
            logger.log(
                20, '~~ END SIMPLEX CYCLE {} ~~'.format(current_cycle).rjust(
                    79, '~'))
        if best_ff.score < self.ff.score:
            logger.log(
                20, '~~ SIMPLEX FINISHED WITH IMPROVEMENTS ~~'.rjust(79, '~'))
            best_ff = restore_simp_ff(best_ff, self.ff)
        else:
            logger.log(
                20,
                '~~ SIMPLEX FINISHED WITHOUT IMPROVEMENTS ~~'.rjust(79, '~'))
        opt.pretty_ff_results(self.ff, level=20)
        opt.pretty_ff_results(best_ff, level=20)
        logger.log(20, '  -- Writing best force field from simplex.')
        best_ff.export_ff(best_ff.path)
        return best_ff
Пример #7
0
    def run(self, r_data=None):
        """
        Once all attributes are setup as you so desire, run this method to
        optimize the parameters.

        Returns
        -------
        `datatypes.FF` (or subclass)
            Contains the best parameters.
        """
        if r_data is None:
            r_data = opt.return_ref_data(self.args_ref)

        if self.ff.score is None:
            logger.log(20, '~~ CALCULATING INITIAL FF SCORE ~~'.rjust(79, '~'))
            self.ff.export_ff()
            # Could store data on self.ff.data if we wanted. Not necessary for
            # simplex. If simplex yielded no improvements, it would return this
            # FF, and then we might want the data such taht we don't have to
            # recalculate it in gradient. Let's hope simplex generally yields
            # improvements.
            data = calculate.main(self.args_ff)
            #deprecated
            #self.ff.score = compare.compare_data(r_data, data)
            r_dict = compare.data_by_type(r_data)
            c_dict = compare.data_by_type(data)
            r_dict, c_dict = compare.trim_data(r_dict, c_dict)
            self.ff.score = compare.compare_data(r_dict, c_dict)
        else:
            logger.log(20,
                       '  -- Reused existing score and data for initial FF.')

        logger.log(20, '~~ SIMPLEX OPTIMIZATION ~~'.rjust(79, '~'))
        logger.log(20, 'INIT FF SCORE: {}'.format(self.ff.score))
        opt.pretty_ff_results(self.ff, level=20)

        # Here's what we do if there are too many parameters.
        if self.max_params and len(self.ff.params) > self.max_params:
            logger.log(20, '  -- More parameters than the maximum allowed.')
            logger.log(5, 'CURRENT PARAMS: {}'.format(len(self.ff.params)))
            logger.log(5, 'MAX PARAMS: {}'.format(self.max_params))
            # Here we select the parameters that have the lowest 2nd
            # derivatives.

            # Could fail when simplex finds improvements but restores other
            # parameters.
            # if self.ff.params[0].d1:

            if None in [x.d1 for x in self.ff.params]:
                logger.log(15, '  -- Calculating new parameter derivatives.')
                # Do central differentiation so we can calculate derivatives.
                # Another option would be to write code to determine
                # derivatives only from forward differentiation.
                ffs = opt.differentiate_ff(self.ff, central=True)
                # We have to score to get the derivatives.
                for ff in ffs:
                    ff.export_ff(path=self.ff.path, lines=self.ff_lines)
                    logger.log(20, '  -- Calculating {}.'.format(ff))
                    data = calculate.main(self.args_ff)
                    #deprecated
                    #ff.score = compare.compare_data(r_data, data)
                    r_dict = compare.data_by_type(r_data)
                    c_dict = compare.data_by_type(data)
                    r_dict, c_dict = compare.trim_data(r_dict, c_dict)
                    ff.score = compare.compare_data(r_dict, c_dict)
                    opt.pretty_ff_results(ff)
                # Add the derivatives to your original FF.
                opt.param_derivs(self.ff, ffs)
                # Only keep the forward differentiated FFs.
                ffs = opt.extract_forward(ffs)
                logger.log(
                    5, '  -- Keeping {} forward differentiated '
                    'FFs.'.format(len(ffs)))
            else:
                logger.log(15, '  -- Reusing existing parameter derivatives.')
                # Differentiate all parameters forward. Yes, I know this is
                # counter-intuitive because we are going to only use subset of
                # the forward differentiated FFs. However, this is very
                # computationally inexpensive because we're not scoring them
                # now. We will remove the forward differentiated FFs we don't
                # want before scoring.
                ffs = opt.differentiate_ff(self.ff, central=False)

            # This sorts the parameters based upon their 2nd derivative.
            # It keeps the ones with lowest 2nd derivatives.

            # SCHEDULED FOR CHANGES. NOT A GOOD SORTING CRITERION.
            params = select_simp_params_on_derivs(self.ff.params,
                                                  max_params=self.max_params)
            # From the entire list of forward differentiated FFs, pick
            # out the ones that have the lowest 2nd derivatives.
            self.new_ffs = opt.extract_ff_by_params(ffs, params)
            logger.log(1,
                       '>>> len(self.new_ffs): {}'.format(len(self.new_ffs)))

            # Reduce number of parameters.
            # Will need an option that's not MM3* specific in the future.
            ff_rows = [x.mm3_row for x in params]
            ff_cols = [x.mm3_col for x in params]
            for ff in self.new_ffs:
                new_params = []
                for param in ff.params:
                    if param.mm3_row in ff_rows and param.mm3_col in ff_cols:
                        new_params.append(param)
                ff.params = new_params
            # Make a copy of your original FF that has less parameters.
            ff_copy = copy.deepcopy(self.ff)
            new_params = []
            for param in ff.params:
                if param.mm3_row in ff_rows and param.mm3_col in ff_cols:
                    new_params.append(param)
            ff_copy.params = new_params
        else:
            # In this case it's simple. Just forward differentiate each
            # parameter.
            self.new_ffs = opt.differentiate_ff(self.ff, central=False)
            logger.log(1,
                       '>>> len(self.new_ffs): {}'.format(len(self.new_ffs)))
            # Still make that FF copy.
            ff_copy = copy.deepcopy(self.ff)
        # Double check and make sure they're all scored.
        for ff in self.new_ffs:
            if ff.score is None:
                ff.export_ff(path=self.ff.path, lines=self.ff_lines)
                logger.log(20, '  -- Calculating {}.'.format(ff))
                data = calculate.main(self.args_ff)
                #deprecated
                #ff.score = compare.compare_data(r_data, data)
                r_dict = compare.data_by_type(r_data)
                c_dict = compare.data_by_type(data)
                r_dict, c_dict = compare.trim_data(r_dict, c_dict)
                ff.score = compare.compare_data(r_dict, c_dict)
                opt.pretty_ff_results(ff)

        # Add your copy of the orignal to FF to the forward differentiated FFs.
        self.new_ffs = sorted(self.new_ffs + [ff_copy], key=lambda x: x.score)
        # Allow 3 cycles w/o change for each parameter present. Remember that
        # the initial FF was added here, hence the minus one.
        self._max_cycles_wo_change = 3 * (len(self.new_ffs) - 1)
        wrapper = textwrap.TextWrapper(width=79)
        # Shows all FFs parameters.
        opt.pretty_ff_params(self.new_ffs)

        # Start the simplex cycles.
        current_cycle = 0
        cycles_wo_change = 0
        while current_cycle < self.max_cycles \
                and cycles_wo_change < self._max_cycles_wo_change:
            current_cycle += 1

            # Save the last best in case some accidental sort goes on.
            # Plus it makes reading the code a litle easier.
            last_best_ff = copy.deepcopy(self.new_ffs[0])
            logger.log(
                20, '~~ START SIMPLEX CYCLE {} ~~'.format(current_cycle).rjust(
                    79, '~'))
            logger.log(20, 'ORDERED FF SCORES:')
            logger.log(
                20,
                wrapper.fill('{}'.format(' '.join('{:15.4f}'.format(x.score)
                                                  for x in self.new_ffs))))

            inv_ff = self.ff.__class__()
            if self.do_weighted_reflection:
                inv_ff.method = 'WEIGHTED INVERSION'
            else:
                inv_ff.method = 'INVERSION'
            inv_ff.params = copy.deepcopy(last_best_ff.params)
            ref_ff = self.ff.__class__()
            ref_ff.method = 'REFLECTION'
            ref_ff.params = copy.deepcopy(last_best_ff.params)
            # Need score difference sum for weighted inversion.
            # Calculate this value before going into loop.
            if self.do_weighted_reflection:
                # If zero, should break.
                score_diff_sum = sum([
                    x.score - self.new_ffs[-1].score for x in self.new_ffs[:-1]
                ])
                if score_diff_sum == 0.:
                    logger.warning('No difference between force field scores. '
                                   'Exiting simplex.')
                    # We want to raise opt.OptError such that
                    # opt.catch_run_errors will write the best FF obtained thus
                    # far.
                    raise opt.OptError(
                        'No difference between force field scores. '
                        'Exiting simplex.')
            for i in range(0, len(last_best_ff.params)):
                if self.do_weighted_reflection:
                    inv_val = (sum([
                        x.params[i].value * (x.score - self.new_ffs[-1].score)
                        for x in self.new_ffs[:-1]
                    ]) / score_diff_sum)
                else:
                    inv_val = (
                        sum([x.params[i].value for x in self.new_ffs[:-1]]) /
                        len(self.new_ffs[:-1]))
                inv_ff.params[i].value = inv_val
                ref_ff.params[i].value = (2 * inv_val -
                                          self.new_ffs[-1].params[i].value)
            # The inversion point does not need to be scored.
            # Calculate score for reflected parameters.
            ref_ff.export_ff(path=self.ff.path, lines=self.ff.lines)
            data = calculate.main(self.args_ff)
            #deprecated
            #ref_ff.score = compare.compare_data(r_data, data)
            r_dict = compare.data_by_type(r_data)
            c_dict = compare.data_by_type(data)
            r_dict, c_dict = compare.trim_data(r_dict, c_dict)
            ref_ff.score = compare.compare_data(r_dict, c_dict)
            opt.pretty_ff_results(ref_ff)
            if ref_ff.score < last_best_ff.score:
                logger.log(20, '~~ ATTEMPTING EXPANSION ~~'.rjust(79, '~'))
                exp_ff = self.ff.__class__()
                exp_ff.method = 'EXPANSION'
                exp_ff.params = copy.deepcopy(last_best_ff.params)
                for i in range(0, len(last_best_ff.params)):
                    exp_ff.params[i].value = (
                        3 * inv_ff.params[i].value -
                        2 * self.new_ffs[-1].params[i].value)
                exp_ff.export_ff(path=self.ff.path, lines=self.ff.lines)
                data = calculate.main(self.args_ff)
                #deprecated
                #exp_ff.score = compare.compare_data(r_data, data)
                r_dict = compare.data_by_type(r_data)
                c_dict = compare.data_by_type(data)
                r_dict, c_dict = compare.trim_data(r_dict, c_dict)
                exp_ff.score = compare.compare_data(r_dict, c_dict)
                opt.pretty_ff_results(exp_ff)
                if exp_ff.score < ref_ff.score:
                    self.new_ffs[-1] = exp_ff
                    logger.log(
                        20, '  -- Expansion succeeded. Keeping expanded '
                        'parameters.')
                else:
                    self.new_ffs[-1] = ref_ff
                    logger.log(
                        20,
                        '  -- Expansion failed. Keeping reflected parameters.')
            elif ref_ff.score < self.new_ffs[-2].score:
                logger.log(20, '  -- Keeping reflected parameters.')
                self.new_ffs[-1] = ref_ff
            else:
                logger.log(20, '~~ ATTEMPTING CONTRACTION ~~'.rjust(79, '~'))
                con_ff = self.ff.__class__()
                con_ff.method = 'CONTRACTION'
                con_ff.params = copy.deepcopy(last_best_ff.params)
                for i in range(0, len(last_best_ff.params)):
                    if ref_ff.score > self.new_ffs[-1].score:
                        con_val = ((inv_ff.params[i].value +
                                    self.new_ffs[-1].params[i].value) / 2)
                    else:
                        con_val = ((3 * inv_ff.params[i].value -
                                    self.new_ffs[-1].params[i].value) / 2)
                    con_ff.params[i].value = con_val
                self.ff.export_ff(params=con_ff.params)
                data = calculate.main(self.args_ff)
                #deprecated
                #con_ff.score = compare.compare_data(r_data, data)
                r_dict = compare.data_by_type(r_data)
                c_dict = compare.data_by_type(data)
                r_dict, c_dict = compare.trim_data(r_dict, c_dict)
                con_ff.score = compare.compare_data(r_dict, c_dict)
                opt.pretty_ff_results(con_ff)
                # This change was made to reflect the 1998 Q2MM publication.
                # if con_ff.score < self.new_ffs[-1].score:
                if con_ff.score < self.new_ffs[-2].score:
                    logger.log(20, '  -- Contraction succeeded.')
                    self.new_ffs[-1] = con_ff
                elif self.do_massive_contraction:
                    logger.log(
                        20, '~~ DOING MASSIVE CONTRACTION ~~'.rjust(79, '~'))
                    for ff_num, ff in enumerate(self.new_ffs[1:]):
                        for i in range(0, len(last_best_ff.params)):
                            ff.params[i].value = (
                                (ff.params[i].value +
                                 self.new_ffs[0].params[i].value) / 2)
                        self.ff.export_ff(params=ff.params)
                        data = calculate.main(self.args_ff)
                        #deprecated
                        #ff.score = compare.compare_data(r_data, data)
                        r_dict = compare.data_by_type(r_data)
                        c_dict = compare.data_by_type(data)
                        r_dict, c_dict = compare.trim_data(r_dict, c_dict)
                        ff.score = compare.compare_data(r_dict, c_dict)
                        ff.method += ' MC'
                        opt.pretty_ff_results(ff)
                else:
                    logger.log(
                        20, '  -- Contraction failed. Keeping parmaeters '
                        'anyway.')
                    self.new_ffs[-1] = con_ff
            self.new_ffs = sorted(self.new_ffs, key=lambda x: x.score)
            # Keep track of the number of cycles without change. If there's
            # improvement, reset the counter.
            if self.new_ffs[0].score < last_best_ff.score:
                cycles_wo_change = 0
            else:
                cycles_wo_change += 1
                logger.log(
                    20, '  -- {} cycles without improvement out of {} '
                    'allowed.'.format(cycles_wo_change,
                                      self._max_cycles_wo_change))
            logger.log(20, 'BEST:')
            opt.pretty_ff_results(self.new_ffs[0], level=20)
            logger.log(
                20, '~~ END SIMPLEX CYCLE {} ~~'.format(current_cycle).rjust(
                    79, '~'))

        # This sort is likely unnecessary because it should be done at the end
        # of the last loop cycle, but I put it here just in case.
        self.new_ffs = sorted(self.new_ffs, key=lambda x: x.score)
        best_ff = self.new_ffs[0]
        if best_ff.score < self.ff.score:
            logger.log(
                20, '~~ SIMPLEX FINISHED WITH IMPROVEMENTS ~~'.rjust(79, '~'))
            best_ff = restore_simp_ff(best_ff, self.ff)
        else:
            logger.log(
                20,
                '~~ SIMPLEX FINISHED WITHOUT IMPROVEMENTS ~~'.rjust(79, '~'))
            # This restores the inital parameters, so no need to use
            # restore_simp_ff here.
            best_ff = self.ff
        opt.pretty_ff_results(self.ff, level=20)
        opt.pretty_ff_results(best_ff, level=20)
        logger.log(20, '  -- Writing best force field from simplex.')
        best_ff.export_ff(best_ff.path)
        return best_ff
Пример #8
0
 def run(self, ref_data=None, restart=None):
     # We need reference data if you didn't provide it.
     if ref_data is None:
         ref_data = opt.return_ref_data(self.args_ref)
     # We need the initial FF data.
     if self.ff.data is None:
         logger.log(20, '~~ GATHERING INITIAL FF DATA ~~'.rjust(79, '~'))
         # Check whether this is efficient with the ff_lines.
         self.ff.export_ff()
         self.ff.data = calculate.main(self.args_ff)
         # We could do this, but the zeroing of energies has already been
         # done.
         # self.ff.score = compare.compare_data(ref_data, self.ff.data)
         # So instead we do this.
         compare.correlate_energies(ref_data, self.ff.data)
     if self.ff.score is None:
         # Already zeroed reference and correlated the energies.
         self.ff.score = compare.calculate_score(ref_data, self.ff.data)
         logger.log(20, 'INITIAL FF SCORE: {}'.format(self.ff.score))
     logger.log(20, '~~ GRADIENT OPTIMIZATION ~~'.rjust(79, '~'))
     # We need a file to hold the differentiated parameter data.
     logger.log(20, '~~ CENTRAL DIFFERENTIATION ~~'.rjust(79, '~'))
     if restart:
         par_file = restart
         logger.log(
             20, '  -- Restarting gradient from central '
             'differentiation file {}.'.format(par_file))
     else:
         par_files = glob.glob(os.path.join(self.direc, 'par_diff_???.txt'))
         if par_files:
             par_files.sort()
             most_recent_par_file = par_files[-1]
             most_recent_par_file = most_recent_par_file.split('/')[-1]
             most_recent_num = most_recent_par_file[9:12]
             num = int(most_recent_num) + 1
             par_file = 'par_diff_{:03d}.txt'.format(num)
         else:
             par_file = 'par_diff_001.txt'
         logger.log(
             20, '  -- Generating central differentiation '
             'file {}.'.format(par_file))
         f = open(os.path.join(self.direc, par_file), 'w')
         csv_writer = csv.writer(f)
         # Row 1 - Labels
         # Row 2 - Weights
         # Row 3 - Reference data values
         # Row 4 - Initial FF data values
         csv_writer.writerow([x.lbl for x in ref_data])
         csv_writer.writerow([x.wht for x in ref_data])
         csv_writer.writerow([x.val for x in ref_data])
         csv_writer.writerow([x.val for x in self.ff.data])
         logger.log(20, '~~ DIFFERENTIATING PARAMETERS ~~'.rjust(79, '~'))
         # Save many FFs, each with their own parameter sets.
         ffs = opt.differentiate_ff(self.ff)
         logger.log(
             20, '~~ SCORING DIFFERENTIATED PARAMETERS ~~'.rjust(79, '~'))
         for ff in ffs:
             ff.export_ff(lines=self.ff.lines)
             logger.log(20, '  -- Calculating {}.'.format(ff))
             data = calculate.main(self.args_ff)
             compare.correlate_energies(ref_data, data)
             ff.score = compare.calculate_score(ref_data, data)
             opt.pretty_ff_results(ff)
             # Write the data rather than storing it in memory. For large parameter
             # sets, this could consume GBs of memory otherwise!
             csv_writer.writerow([x.val for x in data])
         f.close()
     # Calculate the Jacobian, residual vector, matrix A and vector b.
     # These aren't needed if you're only doing Newton-Raphson.
     if self.do_lstsq or self.do_lagrange or self.do_levenberg or \
             self.do_svd:
         logger.log(20, '~~ JACOBIAN AND RESIDUAL VECTOR ~~'.rjust(79, '~'))
         # Setup the residual vector.
         num_d = len(ref_data)
         resid = np.empty((num_d, 1), dtype=float)
         for i in xrange(0, num_d):
             resid[i, 0] = ref_data[i].wht * (ref_data[i].val -
                                              self.ff.data[i].val)
         # logger.log(5, 'RESIDUAL VECTOR:\n{}'.format(resid))
         logger.log(20,
                    '  -- Formed {} residual vector.'.format(resid.shape))
         # Setup the Jacobian.
         num_p = len(self.ff.params)
         # Maybe should be a part of the Jacobian function.
         jacob = np.empty((num_d, num_p), dtype=float)
         jacob = return_jacobian(jacob, os.path.join(self.direc, par_file))
         # logger.log(5, 'JACOBIAN:\n{}'.format(jacob))
         logger.log(20, '  -- Formed {} Jacobian.'.format(jacob.shape))
         ma = jacob.T.dot(jacob)
         vb = jacob.T.dot(resid)
         # We need these for most optimization methods.
         logger.log(5, ' MATRIX A AND VECTOR B '.center(79, '-'))
         # logger.log(5, 'A:\n{}'.format(ma))
         # logger.log(5, 'b:\n{}'.format(vb))
     # Start coming up with new parameter sets.
     if self.do_newton and not restart:
         logger.log(20, '~~ NEWTON-RAPHSON ~~'.rjust(79, '~'))
         # Make sure we have derivative information.
         if self.ff.params[0].d1 is None:
             opt.param_derivs(self.ff, ffs)
         changes = do_newton(self.ff.params,
                             radii=self.newton_radii,
                             cutoffs=self.newton_cutoffs)
         cleanup(self.new_ffs, self.ff, changes)
     if self.do_lstsq:
         logger.log(20, '~~ LEAST SQUARES ~~'.rjust(79, '~'))
         changes = do_lstsq(ma,
                            vb,
                            radii=self.lstsq_radii,
                            cutoffs=self.lstsq_cutoffs)
         cleanup(self.new_ffs, self.ff, changes)
     if self.do_lagrange:
         logger.log(20, '~~ LAGRANGE ~~'.rjust(79, '~'))
         for factor in sorted(self.lagrange_factors):
             changes = do_lagrange(ma,
                                   vb,
                                   factor,
                                   radii=self.lagrange_radii,
                                   cutoffs=self.lagrange_cutoffs)
             cleanup(self.new_ffs, self.ff, changes)
     if self.do_levenberg:
         logger.log(20, '~~ LEVENBERG ~~'.rjust(79, '~'))
         for factor in sorted(self.levenberg_factors):
             changes = do_levenberg(ma,
                                    vb,
                                    factor,
                                    radii=self.levenberg_radii,
                                    cutoffs=self.levenberg_cutoffs)
             cleanup(self.new_ffs, self.ff, changes)
     if self.do_svd:
         logger.log(20, '~~ SINGULAR VALUE DECOMPOSITION ~~'.rjust(79, '~'))
         # J = U . s . VT
         mu, vs, mvt = return_svd(jacob)
         logger.log(1, '>>> mu.shape: {}'.format(mu.shape))
         logger.log(1, '>>> vs.shape: {}'.format(vs.shape))
         logger.log(1, '>>> mvt.shape: {}'.format(mvt.shape))
         logger.log(1, '>>> vb.shape: {}'.format(vb.shape))
         if self.svd_factors:
             changes = do_svd_w_thresholds(mu,
                                           vs,
                                           mvt,
                                           resid,
                                           self.svd_factors,
                                           radii=self.svd_radii,
                                           cutoffs=self.svd_cutoffs)
         else:
             changes = do_svd_wo_thresholds(mu,
                                            vs,
                                            mvt,
                                            resid,
                                            radii=self.svd_radii,
                                            cutoffs=self.svd_cutoffs)
         cleanup(self.new_ffs, self.ff, changes)
     # Report how many trial FFs were generated.
     logger.log(
         20, '  -- Generated {} trial force field(s).'.format(
             len(self.new_ffs)))
     # If there are any trials, test them.
     if self.new_ffs:
         logger.log(20, '~~ EVALUATING TRIAL FF(S) ~~'.rjust(79, '~'))
         for ff in self.new_ffs:
             data = opt.cal_ff(ff, self.args_ff, parent_ff=self.ff)
             # Shouldn't need to zero anymore.
             ff.score = compare.compare_data(ref_data, data)
             opt.pretty_ff_results(ff)
         self.new_ffs = sorted(self.new_ffs, key=lambda x: x.score)
         # Check for improvement.
         if self.new_ffs[0].score < self.ff.score:
             ff = self.new_ffs[0]
             logger.log(
                 20,
                 '~~ GRADIENT FINISHED WITH IMPROVEMENTS ~~'.rjust(79, '~'))
             opt.pretty_ff_results(self.ff, level=20)
             opt.pretty_ff_results(ff, level=20)
             # Copy parameter derivatives from original FF to save time in
             # case we move onto simplex immediately after this.
             copy_derivs(self.ff, ff)
         else:
             ff = self.ff
     else:
         ff = self.ff
     return ff