def run(self, r_data=None): """ Once all attributes are setup as you so desire, run this method to optimize the parameters. Returns ------- `datatypes.FF` (or subclass) Contains the best parameters. """ if r_data is None: r_data = opt.return_ref_data(self.args_ref) if self.ff.score is None: logger.log(20, '~~ CALCULATING INITIAL FF SCORE ~~'.rjust(79, '~')) self.ff.export_ff() # Could store data on self.ff.data if we wanted. Not necessary for # simplex. If simplex yielded no improvements, it would return this # FF, and then we might want the data such taht we don't have to # recalculate it in gradient. Let's hope simplex generally yields # improvements. data = calculate.main(self.args_ff) self.ff.score = compare.compare_data(r_data, data) else: logger.log(20, ' -- Reused existing score and data for initial FF.') logger.log(20, '~~ SIMPLEX OPTIMIZATION ~~'.rjust(79, '~')) logger.log(20, 'INIT FF SCORE: {}'.format(self.ff.score)) opt.pretty_ff_results(self.ff, level=20) if self.max_params and len(self.ff.params) > self.max_params: logger.log(20, ' -- More parameters than the maximum allowed.') logger.log(5, 'CURRENT PARAMS: {}'.format(len(self.ff.params))) logger.log(5, 'MAX PARAMS: {}'.format(self.max_params)) # Here we select the parameters that have the lowest 2nd # derivatives. # THIS IS SCHEDULED FOR CHANGING. THIS IS ACTUALLY NOT A GOOD # CRITERION FOR PARAMETER SELECTION. if self.ff.params[0].d1: logger.log(15, ' -- Reusing existing parameter derivatives.') # Differentiate all parameters forward. Yes, I know this is # counter-intuitive because we are going to only use subset of # the forward differentiated FFs. However, this is very # computationally inexpensive because we're not scoring them # now. We will remove the forward differentiated FFs we don't # want before scoring. ffs = opt.differentiate_ff(self.ff, central=False) else: logger.log(15, ' -- Calculating new parameter derivatives.') # Do central differentiation so we can calculate derivatives. # Another option would be to write code to determine # derivatives only from forward differentiation. ffs = opt.differentiate_ff(self.ff, central=True) # We have to score to get the derivatives. for ff in ffs: ff.export_ff(lines=self.ff_lines) logger.log(20, ' -- Calculating {}.'.format(ff)) data = calculate.main(self.args_ff) ff.score = compare.compare_data(r_data, data) opt.pretty_ff_results(ff) # Add the derivatives to your original FF. opt.param_derivs(self.ff, ffs) # Only keep the forward differentiated FFs. ffs = opt.extract_forward(ffs) logger.log(5, ' -- Keeping {} forward differentiated ' 'FFs.'.format(len(ffs))) # This sorts the parameters based upon their 2nd derivative. # It keeps the ones with lowest 2nd derivatives. # SCHEDULED FOR CHANGES. NOT A GOOD SORTING CRITERION. params = select_simp_params_on_derivs( self.ff.params, max_params=self.max_params) # From the entire list of forward differentiated FFs, pick # out the ones that have the lowest 2nd derivatives. self.new_ffs = opt.extract_ff_by_params(ffs, params) logger.log(1, '>>> len(self.new_ffs): {}'.format(len(self.new_ffs))) # Reduce number of parameters. # Will need an option that's not MM3* specific in the future. ff_rows = [x.mm3_row for x in params] ff_cols = [x.mm3_col for x in params] for ff in self.new_ffs: new_params = [] for param in ff.params: if param.mm3_row in ff_rows and param.mm3_col in ff_cols: new_params.append(param) ff.params = new_params # Make a copy of your original FF that has less parameters. ff_copy = copy.deepcopy(self.ff) new_params = [] for param in ff.params: if param.mm3_row in ff_rows and param.mm3_col in ff_cols: new_params.append(param) ff_copy.params = new_params else: # In this case it's simple. Just forward differentiate each # parameter. self.new_ffs = opt.differentiate_ff(self.ff, central=False) logger.log(1, '>>> len(self.new_ffs): {}'.format(len(self.new_ffs))) # Still make that FF copy. ff_copy = copy.deepcopy(self.ff) # Double check and make sure they're all scored. for ff in self.new_ffs: if ff.score is None: ff.export_ff(lines=self.ff_lines) logger.log(20, ' -- Calculating {}.'.format(ff)) data = calculate.main(self.args_ff) ff.score = compare.compare_data(r_data, data) opt.pretty_ff_results(ff) # Add your copy of the orignal to FF to the forward differentiated FFs. self.new_ffs = sorted(self.new_ffs + [ff_copy], key=lambda x: x.score) # Allow 3 cycles w/o change for each parameter present. Remember that # the initial FF was added here, hence the minus one. self._max_cycles_wo_change = 3 * (len(self.new_ffs) - 1) wrapper = textwrap.TextWrapper(width=79) # Shows all FFs parameters. opt.pretty_ff_params(self.new_ffs) # Start the simplex cycles. current_cycle = 0 cycles_wo_change = 0 while current_cycle < self.max_cycles \ and cycles_wo_change < self._max_cycles_wo_change: current_cycle += 1 last_best = self.new_ffs[0].score best_ff = self.new_ffs[0] logger.log(20, '~~ START SIMPLEX CYCLE {} ~~'.format( current_cycle).rjust(79, '~')) logger.log(20, 'ORDERED FF SCORES:') logger.log(20, wrapper.fill('{}'.format( ' '.join('{:15.4f}'.format(x.score) for x in self.new_ffs)))) # !!! FOR TESTING !!! # Write the best and worst FFs to some other directory. Then # write the worst FF to optimization working directory. Then # raise opt.OptError. The worst FF should be overwritten by # the best FF afterwards. # if current_cycle == 5: # self.new_ffs[-1].export_ff( # path='ref_methanol_flds/mm3_worst.fld', # lines=self.ff.lines) # self.new_ffs[0].export_ff( # path='ref_methanol_flds/mm3_best.fld', # lines=self.ff.lines) # self.new_ffs[-1].export_ff( # path='ref_methanol/mm3.fld', # lines=self.ff.lines) # raise opt.OptError # !!! END TESTING !!! inv_ff = self.ff.__class__() if self.do_weighted_reflection: inv_ff.method = 'WEIGHTED INVERSION' else: inv_ff.method = 'INVERSION' inv_ff.params = copy.deepcopy(best_ff.params) ref_ff = self.ff.__class__() ref_ff.method = 'REFLECTION' ref_ff.params = copy.deepcopy(best_ff.params) # Need score difference sum for weighted inversion. # Calculate this value before going into loop. if self.do_weighted_reflection: # If zero, should break. score_diff_sum = sum([x.score - self.new_ffs[-1].score for x in self.new_ffs[:-1]]) if score_diff_sum == 0.: logger.warning( 'No difference between force field scores. ' 'Exiting simplex.') # We want to raise opt.OptError such that # opt.catch_run_errors will write the best FF obtained thus # far. raise opt.OptError( 'No difference between force field scores. ' 'Exiting simplex.') for i in xrange(0, len(best_ff.params)): if self.do_weighted_reflection: inv_val = ( sum([x.params[i].value * (x.score - self.new_ffs[-1].score) for x in self.new_ffs[:-1]]) / score_diff_sum) else: inv_val = ( sum([x.params[i].value for x in self.new_ffs[:-1]]) / len(self.new_ffs[:-1])) inv_ff.params[i].value = inv_val ref_ff.params[i].value = ( 2 * inv_val - self.new_ffs[-1].params[i].value) # The inversion point does not need to be scored. # Calculate score for reflected parameters. self.ff.export_ff(self.ff.path, params=ref_ff.params) data = calculate.main(self.args_ff) ref_ff.score = compare.compare_data(r_data, data) opt.pretty_ff_results(ref_ff) if ref_ff.score < self.new_ffs[0].score: logger.log(20, '~~ ATTEMPTING EXPANSION ~~'.rjust(79, '~')) exp_ff = self.ff.__class__() exp_ff.method = 'EXPANSION' exp_ff.params = copy.deepcopy(best_ff.params) for i in xrange(0, len(self.new_ffs[0].params)): exp_ff.params[i].value = ( 3 * inv_ff.params[i].value - 2 * self.new_ffs[-1].params[i].value) self.ff.export_ff(self.ff.path, exp_ff.params) data = calculate.main(self.args_ff) exp_ff.score = compare.compare_data(r_data, data) opt.pretty_ff_results(exp_ff) if exp_ff.score < ref_ff.score: self.new_ffs[-1] = exp_ff logger.log( 20, ' -- Expansion succeeded. Keeping expanded ' 'parameters.') else: self.new_ffs[-1] = ref_ff logger.log( 20, ' -- Expansion failed. Keeping reflected parameters.') elif ref_ff.score < self.new_ffs[-2].score: logger.log(20, ' -- Keeping reflected parameters.') self.new_ffs[-1] = ref_ff else: logger.log(20, '~~ ATTEMPTING CONTRACTION ~~'.rjust(79, '~')) con_ff = self.ff.__class__() con_ff.method = 'CONTRACTION' con_ff.params = copy.deepcopy(best_ff.params) for i in xrange(0, len(best_ff.params)): if ref_ff.score > self.new_ffs[-1].score: con_val = ( (inv_ff.params[i].value + self.new_ffs[-1].params[i].value) / 2) else: con_val = ( (3 * inv_ff.params[i].value - self.new_ffs[-1].params[i].value) / 2) con_ff.params[i].value = con_val self.ff.export_ff(self.ff.path, params=con_ff.params) data = calculate.main(self.args_ff) con_ff.score = compare.compare_data(r_data, data) opt.pretty_ff_results(con_ff) # This change was made to reflect the 1998 Q2MM publication. # if con_ff.score < self.new_ffs[-1].score: if con_ff.score < self.new_ffs[-2].score: logger.log(20, ' -- Contraction succeeded.') self.new_ffs[-1] = con_ff elif self.do_massive_contraction: logger.log( 20, '~~ DOING MASSIVE CONTRACTION ~~'.rjust(79, '~')) for ff_num, ff in enumerate(self.new_ffs[1:]): for i in xrange(0, len(best_ff.params)): ff.params[i].value = ( (ff.params[i].value + self.new_ffs[0].params[i].value) / 2) self.ff.export_ff(self.ff.path, params=ff.params) data = calculate.main(self.args_ff) ff.score = compare.compare_data(r_data, data) ff.method += ' MC' opt.pretty_ff_results(ff) else: logger.log( 20, ' -- Contraction failed. Keeping parmaeters ' 'anyway.') self.new_ffs[-1] = con_ff self.new_ffs = sorted(self.new_ffs, key=lambda x: x.score) if self.new_ffs[0].score < last_best: cycles_wo_change = 0 else: cycles_wo_change += 1 logger.log(20, ' -- {} cycles without improvement out of {} ' 'allowed.'.format( cycles_wo_change, self._max_cycles_wo_change)) best_ff = self.new_ffs[0] logger.log(20, 'BEST:') opt.pretty_ff_results(self.new_ffs[0], level=20) logger.log(20, '~~ END SIMPLEX CYCLE {} ~~'.format( current_cycle).rjust(79, '~')) if best_ff.score < self.ff.score: logger.log(20, '~~ SIMPLEX FINISHED WITH IMPROVEMENTS ~~'.rjust( 79, '~')) best_ff = restore_simp_ff(best_ff, self.ff) else: logger.log(20, '~~ SIMPLEX FINISHED WITHOUT IMPROVEMENTS ~~'.rjust( 79, '~')) opt.pretty_ff_results(self.ff, level=20) opt.pretty_ff_results(best_ff, level=20) logger.log(20, ' -- Writing best force field from simplex.') best_ff.export_ff(best_ff.path) return best_ff
def run(self, ref_data=None, restart=None): """ Runs the gradient optimization. Ensure that the attributes in __init__ are set as you desire before using this function. Returns ------- `datatypes.FF` (or subclass) Contains the best parameters. """ # We need reference data if you didn't provide it. if ref_data is None: ref_data = opt.return_ref_data(self.args_ref) # We need the initial FF data. if self.ff.data is None: logger.log(20, '~~ GATHERING INITIAL FF DATA ~~'.rjust(79, '~')) # Is opt.Optimizer.ff_lines used anymore? self.ff.export_ff() self.ff.data = calculate.main(self.args_ff) # Not 100% sure if this is necessary, but it certainly doesn't hurt. compare.correlate_energies(ref_data, self.ff.data) r_dict = compare.data_by_type(ref_data) c_dict = compare.data_by_type(self.ff.data) r_dict, c_dict = compare.trim_data(r_dict, c_dict) if self.ff.score is None: # Already zeroed reference and correlated the energies. self.ff.score = compare.compare_data(r_dict, c_dict) data_types = [] for typ in r_dict: data_types.append(typ) data_types.sort() logger.log(20, '~~ GRADIENT OPTIMIZATION ~~'.rjust(79, '~')) logger.log(20, 'INIT FF SCORE: {}'.format(self.ff.score)) opt.pretty_ff_results(self.ff, level=20) logger.log(20, '~~ CENTRAL DIFFERENTIATION ~~'.rjust(79, '~')) if restart: par_file = restart logger.log( 20, ' -- Restarting gradient from central ' 'differentiation file {}.'.format(par_file)) else: # We need a file to hold the differentiated parameter data. par_files = glob.glob(os.path.join(self.direc, 'par_diff_???.txt')) if par_files: par_files.sort() most_recent_par_file = par_files[-1] most_recent_par_file = most_recent_par_file.split('/')[-1] most_recent_num = most_recent_par_file[9:12] num = int(most_recent_num) + 1 par_file = 'par_diff_{:03d}.txt'.format(num) else: par_file = 'par_diff_001.txt' logger.log( 20, ' -- Generating central differentiation ' 'file {}.'.format(par_file)) f = open(os.path.join(self.direc, par_file), 'w') csv_writer = csv.writer(f) # Row 1 - Labels # Row 2 - Weights # Row 3 - Reference data values # Row 4 - Initial FF data values ## Deprecated -TR #csv_writer.writerow([x.lbl for x in ref_data]) #csv_writer.writerow([x.wht for x in ref_data]) #csv_writer.writerow([x.val for x in ref_data]) #csv_writer.writerow([x.val for x in self.ff.data]) writerows = [[], [], [], []] for data_type in data_types: writerows[0].extend([x.lbl for x in r_dict[data_type]]) writerows[1].extend([x.wht for x in r_dict[data_type]]) writerows[2].extend([x.val for x in r_dict[data_type]]) writerows[3].extend([x.val for x in c_dict[data_type]]) for row in writerows: csv_writer.writerow(row) logger.log(20, '~~ DIFFERENTIATING PARAMETERS ~~'.rjust(79, '~')) # Save many FFs, each with their own parameter sets. ffs = opt.differentiate_ff(self.ff) logger.log( 20, '~~ SCORING DIFFERENTIATED PARAMETERS ~~'.rjust(79, '~')) for ff in ffs: ff.export_ff(lines=self.ff.lines) logger.log(20, ' -- Calculating {}.'.format(ff)) data = calculate.main(self.args_ff) # Deprecated #ff.score = compare.compare_data(ref_data, data) c_data = compare.data_by_type(data) r_dict, c_data = compare.trim_data(r_dict, c_data) ff.score = compare.compare_data(r_dict, c_data) opt.pretty_ff_results(ff) # Write the data rather than storing it in memory. For large # parameter sets, this could consume GBs of memory otherwise! #csv_writer.writerow([x.val for x in data]) row = [] for data_type in data_types: row.extend([x.val for x in c_data[data_type]]) csv_writer.writerow(row) f.close() # Make sure we have derivative information. Used for NR. # # The derivatives are useful for checking up on the progress of the # optimization and for deciding which parameters to use in a # subsequent simplex optimization. # # Still need a way to do this with the resatrt file. opt.param_derivs(self.ff, ffs) # Calculate the Jacobian, residual vector, matrix A and vector b. # These aren't needed if you're only doing Newton-Raphson. if self.do_lstsq or self.do_lagrange or self.do_levenberg or \ self.do_svd: logger.log(20, '~~ JACOBIAN AND RESIDUAL VECTOR ~~'.rjust(79, '~')) # Setup the residual vector. # Deprecated - TR #num_d = len(ref_data) num_d = 0 for datatype in r_dict: num_d += len(r_dict[datatype]) resid = np.empty((num_d, 1), dtype=float) # Deprecated - TR #for i in xrange(0, num_d): # resid[i, 0] = ref_data[i].wht * \ # (ref_data[i].val - self.ff.data[i].val) count = 0 for data_type in data_types: for r, c in zip(r_dict[data_type], c_dict[data_type]): resid[count, 0] = r.wht * (r.val - c.val) count += 1 # logger.log(5, 'RESIDUAL VECTOR:\n{}'.format(resid)) logger.log(20, ' -- Formed {} residual vector.'.format(resid.shape)) # Setup the Jacobian. num_p = len(self.ff.params) # Maybe should be a part of the Jacobian function. jacob = np.empty((num_d, num_p), dtype=float) jacob = return_jacobian(jacob, os.path.join(self.direc, par_file)) # logger.log(5, 'JACOBIAN:\n{}'.format(jacob)) logger.log(20, ' -- Formed {} Jacobian.'.format(jacob.shape)) ma = jacob.T.dot(jacob) vb = jacob.T.dot(resid) # We need these for most optimization methods. logger.log(5, ' MATRIX A AND VECTOR B '.center(79, '-')) # logger.log(5, 'A:\n{}'.format(ma)) # logger.log(5, 'b:\n{}'.format(vb)) # Start coming up with new parameter sets. if self.do_newton and not restart: logger.log(20, '~~ NEWTON-RAPHSON ~~'.rjust(79, '~')) # Moved the derivative section outside of here. changes = do_newton(self.ff.params, radii=self.newton_radii, cutoffs=self.newton_cutoffs) cleanup(self.new_ffs, self.ff, changes) if self.do_lstsq: logger.log(20, '~~ LEAST SQUARES ~~'.rjust(79, '~')) changes = do_lstsq(ma, vb, radii=self.lstsq_radii, cutoffs=self.lstsq_cutoffs) cleanup(self.new_ffs, self.ff, changes) if self.do_lagrange: logger.log(20, '~~ LAGRANGE ~~'.rjust(79, '~')) for factor in sorted(self.lagrange_factors): changes = do_lagrange(ma, vb, factor, radii=self.lagrange_radii, cutoffs=self.lagrange_cutoffs) cleanup(self.new_ffs, self.ff, changes) if self.do_levenberg: logger.log(20, '~~ LEVENBERG ~~'.rjust(79, '~')) for factor in sorted(self.levenberg_factors): changes = do_levenberg(ma, vb, factor, radii=self.levenberg_radii, cutoffs=self.levenberg_cutoffs) cleanup(self.new_ffs, self.ff, changes) if self.do_svd: logger.log(20, '~~ SINGULAR VALUE DECOMPOSITION ~~'.rjust(79, '~')) # J = U . s . VT mu, vs, mvt = return_svd(jacob) logger.log(1, '>>> mu.shape: {}'.format(mu.shape)) logger.log(1, '>>> vs.shape: {}'.format(vs.shape)) logger.log(1, '>>> mvt.shape: {}'.format(mvt.shape)) logger.log(1, '>>> vb.shape: {}'.format(vb.shape)) if self.svd_factors: changes = do_svd_w_thresholds(mu, vs, mvt, resid, self.svd_factors, radii=self.svd_radii, cutoffs=self.svd_cutoffs) else: changes = do_svd_wo_thresholds(mu, vs, mvt, resid, radii=self.svd_radii, cutoffs=self.svd_cutoffs) cleanup(self.new_ffs, self.ff, changes) # Report how many trial FFs were generated. logger.log( 20, ' -- Generated {} trial force field(s).'.format( len(self.new_ffs))) # If there are any trials, test them. if self.new_ffs: logger.log(20, '~~ EVALUATING TRIAL FF(S) ~~'.rjust(79, '~')) for ff in self.new_ffs: data = opt.cal_ff(ff, self.args_ff, parent_ff=self.ff) # Shouldn't need to zero anymore. # Deprecated #ff.score = compare.compare_data(ref_data, data) c_data = compare.data_by_type(data) r_dict, c_data = compare.trim_data(r_dict, c_data) ff.score = compare.compare_data(r_dict, c_data) opt.pretty_ff_results(ff) self.new_ffs = sorted(self.new_ffs, key=lambda x: x.score) # Check for improvement. if self.new_ffs[0].score < self.ff.score: ff = self.new_ffs[0] logger.log( 20, '~~ GRADIENT FINISHED WITH IMPROVEMENTS ~~'.rjust(79, '~')) opt.pretty_ff_results(self.ff, level=20) opt.pretty_ff_results(ff, level=20) # Copy parameter derivatives from original FF to save time in # case we move onto simplex immediately after this. copy_derivs(self.ff, ff) else: ff = self.ff else: ff = self.ff ff.export_ff(ff.path) return ff
def run(self, r_data=None): """ Once all attributes are setup as you so desire, run this method to optimize the parameters. Returns ------- `datatypes.FF` (or subclass) Contains the best parameters. """ if r_data is None: r_data = opt.return_ref_data(self.args_ref) logger.log(20, '~~ SIMPLEX OPTIMIZATION ~~'.rjust(79, '~')) # Here we don't actually need the database connection/force field data. # We only need the score. if self.ff.score is None: logger.log(20, '~~ CALCULATING INITIAL FF SCORE ~~'.rjust(79, '~')) self.ff.export_ff() # I could store this object to prevent on self.ff to prevent garbage # collection. Would be nice if simplex was followed by gradient, # which needs that information, and if simplex yielded no # improvements. At most points in the optimization, this is probably # too infrequent for it to be worth the memory, but it might be nice # once the parameters are close to convergence. data = calculate.main(self.args_ff) self.ff.score = compare.compare_data(r_data, data, zero=False) logger.log(20, 'INITIAL FF SCORE: {}'.format(self.ff.score)) else: logger.log(15, ' -- Reused existing score and data for initial FF.') logger.log(15, 'INIT FF SCORE: {}'.format(self.ff.score)) ffs = opt.differentiate_ff(self.ff) for ff in ffs: ff.export_ff(lines=self.ff_lines) logger.log(20, ' -- Calculating {}.'.format(ff)) data = calculate.main(self.args_ff) ff.score = compare.compare_data(r_data, data, zero=False) opt.pretty_ff_results(ff) if self.max_params and len(self.ff.params) > self.max_params: simp_params = reduce_num_simp_params( self.ff, ffs, max_params=self.max_params) self.new_ffs = reduce_num_simp_ffs( ffs, simp_params) else: self.new_ffs = ffs self.new_ffs = sorted(self.new_ffs + [self.ff], key=lambda x: x.score) wrapper = textwrap.TextWrapper(width=79) logger.log(20, 'ORDERED FF SCORES:') logger.log(20, wrapper.fill('{}'.format( ' '.join('{:15.4f}'.format(x.score) for x in self.new_ffs)))) # Shows all FFs parameters. opt.pretty_ff_params(self.new_ffs) # Start the simplex cycles. current_cycle = 0 cycles_wo_change = 0 while current_cycle < self.max_cycles \ and cycles_wo_change < self.max_cycles_wo_change: current_cycle += 1 last_best = self.new_ffs[0].score best_ff = self.new_ffs[0] logger.log(20, '~~ START SIMPLEX CYCLE {} ~~'.format( current_cycle).rjust(79, '~')) inv_ff = self.ff.__class__() if self.do_weighted_reflection: inv_ff.method = 'WEIGHTED INVERSION' else: inv_ff.method = 'INVERSION' inv_ff.params = copy.deepcopy(best_ff.params) ref_ff = self.ff.__class__() ref_ff.method = 'REFLECTION' ref_ff.params = copy.deepcopy(best_ff.params) for i in xrange(0, len(best_ff.params)): if self.do_weighted_reflection: try: inv_val = ( sum([x.params[i].value * (x.score - self.new_ffs[-1].score) for x in self.new_ffs[:-1]]) / sum([x.score - self.new_ffs[-1].score for x in self.new_ffs[:-1]])) except ZeroDivisionError: logger.warning( 'Attempted to divide by zero while calculating the ' 'weighted simplex inversion point. All penalty ' 'function scores for the trial force fields are ' 'numerically equivalent.') # Breaking should just exit the while loop. Should still # give you the best force field determined thus far. break else: inv_val = ( sum([x.params[i].value for x in self.new_ffs[:-1]]) / len(self.new_ffs[:-1])) inv_ff.params[i].value = inv_val ref_ff.params[i].value = ( 2 * inv_val - self.new_ffs[-1].params[i].value) # Calculate score for inverted parameters. self.ff.export_ff(self.ff.path, params=inv_ff.params) data = calculate.main(self.args_ff) inv_ff.score = compare.compare_data(r_data, data, zero=False) opt.pretty_ff_results(inv_ff) # Calculate score for reflected parameters. self.ff.export_ff(self.ff.path, params=ref_ff.params) data = calculate.main(self.args_ff) ref_ff.score = compare.compare_data(r_data, data, zero=False) opt.pretty_ff_results(ref_ff) if ref_ff.score < self.new_ffs[0].score: logger.log(20, '~~ ATTEMPTING EXPANSION ~~'.rjust(79, '~')) exp_ff = self.ff.__class__() exp_ff.method = 'EXPANSION' exp_ff.params = copy.deepcopy(best_ff.params) for i in xrange(0, len(self.new_ffs[0].params)): exp_ff.params[i].value = ( 3 * inv_ff.params[i].value - 2 * self.new_ffs[-1].params[i].value) self.ff.export_ff(self.ff.path, exp_ff.params) data = calculate.main(self.args_ff) exp_ff.score = compare.compare_data(r_data, data, zero=False) opt.pretty_ff_results(exp_ff) if exp_ff.score < ref_ff.score: self.new_ffs[-1] = exp_ff logger.log( 20, ' -- Expansion succeeded. Keeping expanded ' 'parameters.') else: self.new_ffs[-1] = ref_ff logger.log( 20, ' -- Expansion failed. Keeping reflected parameters.') elif ref_ff.score < self.new_ffs[-2].score: logger.log(20, ' -- Keeping reflected parameters.') self.new_ffs[-1] = ref_ff else: logger.log(20, '~~ ATTEMPTING CONTRACTION ~~'.rjust(79, '~')) con_ff = self.ff.__class__() con_ff.method = 'CONTRACTION' con_ff.params = copy.deepcopy(best_ff.params) for i in xrange(0, len(best_ff.params)): if ref_ff.score > self.new_ffs[-1].score: con_val = ( (inv_ff.params[i].value + self.new_ffs[-1].params[i].value) / 2) else: con_val = ( (3 * inv_ff.params[i].value - self.new_ffs[-1].params[i].value) / 2) con_ff.params[i].value = con_val self.ff.export_ff(self.ff.path, params=con_ff.params) data = calculate.main(self.args_ff) con_ff.score = compare.compare_data(r_data, data, zero=False) opt.pretty_ff_results(con_ff) if con_ff.score < self.new_ffs[-2].score: self.new_ffs[-1] = con_ff elif self.do_massive_contraction: logger.log( 20, '~~ DOING MASSIVE CONTRACTION ~~'.rjust(79, '~')) for ff_num, ff in enumerate(self.new_ffs[1:]): for i in xrange(0, len(best_ff.params)): ff.params[i].value = ( (ff.params[i].value + self.new_ffs[0].params[i].value) / 2) self.ff.export_ff(self.ff.path, params=ff.params) data = calculate.main(self.args_ff) ff.score = compare.compare_data(r_data, data, zero=False) ff.method += ' MC' opt.pretty_ff_results(ff) else: logger.log(20, ' -- Contraction failed.') self.new_ffs = sorted(self.new_ffs, key=lambda x: x.score) if self.new_ffs[0].score < last_best: cycles_wo_change = 0 else: cycles_wo_change += 1 logger.log(20, ' -- {} cycles without change.'.format( cycles_wo_change)) best_ff = self.new_ffs[0] logger.log(20, 'BEST:') opt.pretty_ff_results(self.new_ffs[0], level=20) logger.log(20, '~~ END SIMPLEX CYCLE {} ~~'.format( current_cycle).rjust(79, '~')) if best_ff.score < self.ff.score: logger.log(20, '~~ SIMPLEX FINISHED WITH IMPROVEMENTS ~~'.rjust( 79, '~')) best_ff = restore_simp_ff(best_ff, self.ff) else: logger.log(20, '~~ SIMPLEX FINISHED WITHOUT IMPROVEMENTS ~~'.rjust( 79, '~')) opt.pretty_ff_results(self.ff, level=20) opt.pretty_ff_results(best_ff, level=20) logger.log(20, ' -- Writing best force field from simplex.') best_ff.export_ff(best_ff.path) return best_ff
def run(self, ref_data=None, restart=None): """ Runs the gradient optimization. Ensure that the attributes in __init__ are set as you desire before using this function. Returns ------- `datatypes.FF` (or subclass) Contains the best parameters. """ # We need reference data if you didn't provide it. if ref_data is None: ref_data = opt.return_ref_data(self.args_ref) # We need the initial FF data. if self.ff.data is None: logger.log(20, '~~ GATHERING INITIAL FF DATA ~~'.rjust(79, '~')) # Is opt.Optimizer.ff_lines used anymore? self.ff.export_ff() self.ff.data = calculate.main(self.args_ff) # Not 100% sure if this is necessary, but it certainly doesn't hurt. compare.correlate_energies(ref_data, self.ff.data) r_dict = compare.data_by_type(ref_data) c_dict = compare.data_by_type(self.ff.data) r_dict, c_dict = compare.trim_data(r_dict,c_dict) if self.ff.score is None: # Already zeroed reference and correlated the energies. self.ff.score = compare.compare_data(r_dict, c_dict) data_types = [] for typ in r_dict: data_types.append(typ) data_types.sort() logger.log(20, '~~ GRADIENT OPTIMIZATION ~~'.rjust(79, '~')) logger.log(20, 'INIT FF SCORE: {}'.format(self.ff.score)) opt.pretty_ff_results(self.ff, level=20) logger.log(20, '~~ CENTRAL DIFFERENTIATION ~~'.rjust(79, '~')) if restart: par_file = restart logger.log(20, ' -- Restarting gradient from central ' 'differentiation file {}.'.format(par_file)) else: # We need a file to hold the differentiated parameter data. par_files = glob.glob(os.path.join(self.direc, 'par_diff_???.txt')) if par_files: par_files.sort() most_recent_par_file = par_files[-1] most_recent_par_file = most_recent_par_file.split('/')[-1] most_recent_num = most_recent_par_file[9:12] num = int(most_recent_num) + 1 par_file = 'par_diff_{:03d}.txt'.format(num) else: par_file = 'par_diff_001.txt' logger.log(20, ' -- Generating central differentiation ' 'file {}.'.format(par_file)) f = open(os.path.join(self.direc, par_file), 'w') csv_writer = csv.writer(f) # Row 1 - Labels # Row 2 - Weights # Row 3 - Reference data values # Row 4 - Initial FF data values ## Deprecated -TR #csv_writer.writerow([x.lbl for x in ref_data]) #csv_writer.writerow([x.wht for x in ref_data]) #csv_writer.writerow([x.val for x in ref_data]) #csv_writer.writerow([x.val for x in self.ff.data]) writerows = [[],[],[],[]] for data_type in data_types: writerows[0].extend([x.lbl for x in r_dict[data_type]]) writerows[1].extend([x.wht for x in r_dict[data_type]]) writerows[2].extend([x.val for x in r_dict[data_type]]) writerows[3].extend([x.val for x in c_dict[data_type]]) for row in writerows: csv_writer.writerow(row) logger.log(20, '~~ DIFFERENTIATING PARAMETERS ~~'.rjust(79, '~')) # Save many FFs, each with their own parameter sets. ffs = opt.differentiate_ff(self.ff) logger.log(20, '~~ SCORING DIFFERENTIATED PARAMETERS ~~'.rjust( 79, '~')) for ff in ffs: ff.export_ff(lines=self.ff.lines) logger.log(20, ' -- Calculating {}.'.format(ff)) data = calculate.main(self.args_ff) # Deprecated #ff.score = compare.compare_data(ref_data, data) c_data = compare.data_by_type(data) r_dict, c_data = compare.trim_data(r_dict,c_data) ff.score = compare.compare_data(r_dict, c_data) opt.pretty_ff_results(ff) # Write the data rather than storing it in memory. For large # parameter sets, this could consume GBs of memory otherwise! #csv_writer.writerow([x.val for x in data]) row = [] for data_type in data_types: row.extend([x.val for x in c_data[data_type]]) csv_writer.writerow(row) f.close() # Make sure we have derivative information. Used for NR. # # The derivatives are useful for checking up on the progress of the # optimization and for deciding which parameters to use in a # subsequent simplex optimization. # # Still need a way to do this with the resatrt file. opt.param_derivs(self.ff, ffs) # Calculate the Jacobian, residual vector, matrix A and vector b. # These aren't needed if you're only doing Newton-Raphson. if self.do_lstsq or self.do_lagrange or self.do_levenberg or \ self.do_svd: logger.log(20, '~~ JACOBIAN AND RESIDUAL VECTOR ~~'.rjust(79, '~')) # Setup the residual vector. # Deprecated - TR #num_d = len(ref_data) num_d = 0 for datatype in r_dict: num_d += len(r_dict[datatype]) resid = np.empty((num_d, 1), dtype=float) # Deprecated - TR #for i in xrange(0, num_d): # resid[i, 0] = ref_data[i].wht * \ # (ref_data[i].val - self.ff.data[i].val) count = 0 for data_type in data_types: for r,c in zip(r_dict[data_type],c_dict[data_type]): resid[count, 0] = r.wht * (r.val - c.val) count += 1 # logger.log(5, 'RESIDUAL VECTOR:\n{}'.format(resid)) logger.log( 20, ' -- Formed {} residual vector.'.format(resid.shape)) # Setup the Jacobian. num_p = len(self.ff.params) # Maybe should be a part of the Jacobian function. jacob = np.empty((num_d, num_p), dtype=float) jacob = return_jacobian(jacob, os.path.join(self.direc, par_file)) # logger.log(5, 'JACOBIAN:\n{}'.format(jacob)) logger.log(20, ' -- Formed {} Jacobian.'.format(jacob.shape)) ma = jacob.T.dot(jacob) vb = jacob.T.dot(resid) # We need these for most optimization methods. logger.log(5, ' MATRIX A AND VECTOR B '.center(79, '-')) # logger.log(5, 'A:\n{}'.format(ma)) # logger.log(5, 'b:\n{}'.format(vb)) # Start coming up with new parameter sets. if self.do_newton and not restart: logger.log(20, '~~ NEWTON-RAPHSON ~~'.rjust(79, '~')) # Moved the derivative section outside of here. changes = do_newton(self.ff.params, radii=self.newton_radii, cutoffs=self.newton_cutoffs) cleanup(self.new_ffs, self.ff, changes) if self.do_lstsq: logger.log(20, '~~ LEAST SQUARES ~~'.rjust(79, '~')) changes = do_lstsq(ma, vb, radii=self.lstsq_radii, cutoffs=self.lstsq_cutoffs) cleanup(self.new_ffs, self.ff, changes) if self.do_lagrange: logger.log(20, '~~ LAGRANGE ~~'.rjust(79, '~')) for factor in sorted(self.lagrange_factors): changes = do_lagrange(ma, vb, factor, radii=self.lagrange_radii, cutoffs=self.lagrange_cutoffs) cleanup(self.new_ffs, self.ff, changes) if self.do_levenberg: logger.log(20, '~~ LEVENBERG ~~'.rjust(79, '~')) for factor in sorted(self.levenberg_factors): changes = do_levenberg(ma, vb, factor, radii=self.levenberg_radii, cutoffs=self.levenberg_cutoffs) cleanup(self.new_ffs, self.ff, changes) if self.do_svd: logger.log(20, '~~ SINGULAR VALUE DECOMPOSITION ~~'.rjust(79, '~')) # J = U . s . VT mu, vs, mvt = return_svd(jacob) logger.log(1, '>>> mu.shape: {}'.format(mu.shape)) logger.log(1, '>>> vs.shape: {}'.format(vs.shape)) logger.log(1, '>>> mvt.shape: {}'.format(mvt.shape)) logger.log(1, '>>> vb.shape: {}'.format(vb.shape)) if self.svd_factors: changes = do_svd_w_thresholds(mu, vs, mvt, resid, self.svd_factors, radii=self.svd_radii, cutoffs=self.svd_cutoffs) else: changes = do_svd_wo_thresholds(mu, vs, mvt, resid, radii=self.svd_radii, cutoffs=self.svd_cutoffs) cleanup(self.new_ffs, self.ff, changes) # Report how many trial FFs were generated. logger.log(20, ' -- Generated {} trial force field(s).'.format( len(self.new_ffs))) # If there are any trials, test them. if self.new_ffs: logger.log(20, '~~ EVALUATING TRIAL FF(S) ~~'.rjust(79, '~')) for ff in self.new_ffs: data = opt.cal_ff(ff, self.args_ff, parent_ff=self.ff) # Shouldn't need to zero anymore. # Deprecated #ff.score = compare.compare_data(ref_data, data) c_data = compare.data_by_type(data) r_dict, c_data = compare.trim_data(r_dict,c_data) ff.score = compare.compare_data(r_dict, c_data) opt.pretty_ff_results(ff) self.new_ffs = sorted( self.new_ffs, key=lambda x: x.score) # Check for improvement. if self.new_ffs[0].score < self.ff.score: ff = self.new_ffs[0] logger.log( 20, '~~ GRADIENT FINISHED WITH IMPROVEMENTS ~~'.rjust( 79, '~')) opt.pretty_ff_results(self.ff, level=20) opt.pretty_ff_results(ff, level=20) # Copy parameter derivatives from original FF to save time in # case we move onto simplex immediately after this. copy_derivs(self.ff, ff) else: ff = self.ff else: ff = self.ff return ff
def run(self, ref_data=None): # We need reference data if you didn't provide it. if ref_data is None: ref_data = opt.return_ref_data(self.args_ref) # We need the initial FF data. if self.ff.data is None: logger.log(20, '~~ GATHERING INITIAL FF DATA ~~'.rjust(79, '~')) # Check whether this is efficient with the ff_lines. self.ff.export_ff() self.ff.data = calculate.main(self.args_ff) # We could do this, but the zeroing of energies has already been # done. # self.ff.score = compare.compare_data(ref_data, self.ff.data) # So instead we do this. compare.correlate_energies(ref_data, self.ff.data) if self.ff.score is None: # Already zeroed reference and correlated the energies. self.ff.score = compare.calculate_score(ref_data, self.ff.data) logger.log(20, 'INITIAL FF SCORE: {}'.format(self.ff.fscore)) logger.log(20, '~~ GRADIENT OPTIMIZATION ~~'.rjust(79, '~')) # We need a file to hold the differentiated parameter data. par_files = glob.glob(os.path.join(self.direc, 'par_diff_???.txt')) if par_files: par_files.sort() most_recent_par_file = par_files[-1] most_recent_par_file = most_recent_par_file.split('/')[-1] most_recent_num = most_recent_par_file[9:12] num = int(most_recent_num) + 1 par_file = 'par_diff_{:03d}.txt'.format(num) else: par_file = 'par_diff_001.txt' f = open(os.path.join(self.direc, par_file), 'w') csv_writer = csv.writer(f) # Row 1 - Labels # Row 2 - Weights # Row 3 - Reference data values # Row 4 - Initial FF data values csv_writer.writerow([x.lbl for x in ref_data]) csv_writer.writerow([x.wht for x in ref_data]) csv_writer.writerow([x.val for x in ref_data]) csv_writer.writerow([x.val for x in self.ff.data]) logger.log(20, '~~ DIFFERENTIATING PARAMETERS ~~'.rjust(79, '~')) # Setup the residual vector. # Perhaps move this closer to the Jacobian section. num_d = len(ref_data) resid = np.empty((num_d, 1), dtype=float) for i in xrange(0, num_d): resid[i, 0] = ref_data[i].wht * (ref_data[i].val - self.ff.data[i].val) logger.log(5, 'RESIDUAL VECTOR:\n{}'.format(resid)) logger.log(20, ' -- Formed {} residual vector.'.format(resid.shape)) # Save many FFs, each with their own parameter sets. ffs = opt.differentiate_ff(self.ff) logger.log(20, '~~ SCORING DIFFERENTIATED PARAMETERS ~~'.rjust(79, '~')) for ff in ffs: ff.export_ff(lines=self.ff.lines) logger.log(20, ' -- Calculating {}.'.format(ff)) data = calculate.main(self.args_ff) compare.correlate_energies(ref_data, data) ff.score = compare.calculate_score(ref_data, data) opt.pretty_ff_results(ff) # Write the data rather than storing it in memory. For large parameter # sets, this could consume GBs of memory otherwise! csv_writer.writerow([x.val for x in data]) f.close() # Calculate the Jacobian, residual vector, matrix A and vector b. # These aren't needed if you're only doing Newton-Raphson. if self.do_lstsq or self.do_lagrange or self.do_levenberg or \ self.do_svd: logger.log(20, '~~ JACOBIAN AND RESIDUAL VECTOR ~~'.rjust(79, '~')) # Setup the Jacobian. num_p = len(self.ff.params) # Maybe should be a part of the Jacobian function. jacob = np.empty((num_d, num_p), dtype=float) jacob = return_jacobian(jacob, os.path.join(self.direc, par_file)) ma = jacob.T.dot(jacob) vb = jacob.T.dot(resid) # We need these for most optimization methods. logger.log(5, ' MATRIX A AND VECTOR B '.center(79, '-')) logger.log(5, 'A:\n{}'.format(ma)) logger.log(5, 'b:\n{}'.format(vb)) # Start coming up with new parameter sets. if self.do_newton: logger.log(20, '~~ NEWTON-RAPHSON ~~'.rjust(79, '~')) # Make sure we have derivative information. if self.ff.params[0].d1 is None: opt.param_derivs(self.ff, ffs) changes = do_newton(self.ff.params, radii=self.newton_radii, cutoffs=self.newton_cutoffs) cleanup(self.new_ffs, self.ff, changes) if self.do_lstsq: logger.log(20, '~~ LEAST SQUARES ~~'.rjust(79, '~')) changes = do_lstsq(ma, vb, radii=self.lstsq_radii, cutoffs=self.lstsq_cutoffs) cleanup(self.new_ffs, self.ff, changes) if self.do_lagrange: logger.log(20, '~~ LAGRANGE ~~'.rjust(79, '~')) for factor in sorted(self.lagrange_factors): changes = do_lagrange(ma, vb, factor, radii=self.lagrange_radii, cutoffs=self.lagrange_cutoffs) cleanup(self.new_ffs, self.ff, changes) if self.do_levenberg: logger.log(20, '~~ LEVENBERG ~~'.rjust(79, '~')) for factor in sorted(self.levenberg_factors): changes = do_levenberg(ma, vb, factor, radii=self.levenberg_radii, cutoffs=self.levenberg_cutoffs) cleanup(self.new_ffs, self.ff, changes) if self.do_svd: logger.log(20, '~~ SINGULAR VALUE DECOMPOSITION ~~'.rjust(79, '~')) mu, vs, mv = return_svd(ma) if self.svd_factors: changes = do_svd_w_thresholds(mu, vs, mv, vb, self.svd_factors, radii=self.svd_radii, cutoffs=self.svd_cutoffs) else: changes = do_svd_wo_thresholds(mu, vs, mv, vb, radii=self.svd_radii, cutoffs=self.svd_cutoffs) cleanup(self.new_ffs, self.ff, changes) # Report how many trial FFs were generated. logger.log(20, ' -- Generated {} trial force field(s).'.format( len(self.new_ffs))) # If there are any trials, test them. if self.new_ffs: logger.log(20, '~~ EVALUATING TRIAL FF(S) ~~'.rjust(79, '~')) for ff in self.new_ffs: data = opt.cal_ff(ff, self.args_ff, parent_ff=self.ff) ff.score = compare.compare_data(ref_data, data, zero=False) opt.pretty_ff_results(ff) self.new_ffs = sorted( self.new_ffs, key=lambda x: x.score) # Check for improvement. if self.new_ffs[0].score < self.ff.score: ff = self.new_ffs[0] logger.log( 20, '~~ GRADIENT FINISHED WITH IMPROVEMENTS ~~'.rjust( 79, '~')) opt.pretty_ff_results(self.ff, level=20) opt.pretty_ff_results(ff, level=20) else: ff = self.ff else: ff = self.ff return ff
def run(self, r_data=None): """ Once all attributes are setup as you so desire, run this method to optimize the parameters. Returns ------- `datatypes.FF` (or subclass) Contains the best parameters. """ if r_data is None: r_data = opt.return_ref_data(self.args_ref) logger.log(20, '~~ SIMPLEX OPTIMIZATION ~~'.rjust(79, '~')) # Here we don't actually need the database connection/force field data. # We only need the score. if self.ff.score is None: logger.log(20, '~~ CALCULATING INITIAL FF SCORE ~~'.rjust(79, '~')) self.ff.export_ff() # I could store this object to prevent on self.ff to prevent garbage # collection. Would be nice if simplex was followed by gradient, # which needs that information, and if simplex yielded no # improvements. At most points in the optimization, this is probably # too infrequent for it to be worth the memory, but it might be nice # once the parameters are close to convergence. data = calculate.main(self.args_ff) self.ff.score = compare.compare_data(r_data, data) logger.log(20, 'INITIAL FF SCORE: {}'.format(self.ff.score)) else: logger.log(15, ' -- Reused existing score and data for initial FF.') logger.log(15, 'INIT FF SCORE: {}'.format(self.ff.score)) if self.max_params and len(self.ff.params) > self.max_params: if self.ff.params[0].d1: logger.log(15, ' -- Reusing existing parameter derivatives.') # Don't score so this really doesn't take much time. ffs = opt.differentiate_ff(self.ff, central=False) else: logger.log(15, ' -- Calculating new parameter derivatives.') ffs = opt.differentiate_ff(self.ff, central=True) # We have to score to get the derivatives. for ff in ffs: ff.export_ff(lines=self.ff_lines) logger.log(20, ' -- Calculating {}.'.format(ff)) data = calculate.main(self.args_ff) ff.score = compare.compare_data(r_data, data) opt.pretty_ff_results(ff) opt.param_derivs(self.ff, ffs) # Only keep the forward differentiated FFs. ffs = opt.extract_forward(ffs) params = select_simp_params_on_derivs(self.ff.params, max_params=self.max_params) self.new_ffs = opt.extract_ff_by_params(ffs, params) # Reduce number of parameters. # Will need an option that's not MM3* specific. ff_rows = [x.mm3_row for x in params] ff_cols = [x.mm3_col for x in params] for ff in self.new_ffs: new_params = [] for param in ff.params: if param.mm3_row in ff_rows and param.mm3_col in ff_cols: new_params.append(param) ff.params = new_params else: self.new_ffs = opt.differentiate_ff(self.ff, central=False) # Double check and make sure they're all scored. for ff in self.new_ffs: if ff.score is None: ff.export_ff(lines=self.ff_lines) logger.log(20, ' -- Calculating {}.'.format(ff)) data = calculate.main(self.args_ff) ff.score = compare.compare_data(r_data, data) opt.pretty_ff_results(ff) ff_copy = copy.deepcopy(self.ff) new_params = [] for param in ff.params: if param.mm3_row in ff_rows and param.mm3_col in ff_cols: new_params.append(param) ff_copy.params = new_params self.new_ffs = sorted(self.new_ffs + [ff_copy], key=lambda x: x.score) wrapper = textwrap.TextWrapper(width=79) logger.log(20, 'ORDERED FF SCORES:') logger.log( 20, wrapper.fill('{}'.format(' '.join('{:15.4f}'.format(x.score) for x in self.new_ffs)))) # Shows all FFs parameters. opt.pretty_ff_params(self.new_ffs) # Start the simplex cycles. current_cycle = 0 cycles_wo_change = 0 while current_cycle < self.max_cycles \ and cycles_wo_change < self.max_cycles_wo_change: current_cycle += 1 last_best = self.new_ffs[0].score best_ff = self.new_ffs[0] logger.log( 20, '~~ START SIMPLEX CYCLE {} ~~'.format(current_cycle).rjust( 79, '~')) inv_ff = self.ff.__class__() if self.do_weighted_reflection: inv_ff.method = 'WEIGHTED INVERSION' else: inv_ff.method = 'INVERSION' inv_ff.params = copy.deepcopy(best_ff.params) ref_ff = self.ff.__class__() ref_ff.method = 'REFLECTION' ref_ff.params = copy.deepcopy(best_ff.params) for i in xrange(0, len(best_ff.params)): if self.do_weighted_reflection: try: inv_val = (sum([ x.params[i].value * (x.score - self.new_ffs[-1].score) for x in self.new_ffs[:-1] ]) / sum([ x.score - self.new_ffs[-1].score for x in self.new_ffs[:-1] ])) except ZeroDivisionError: logger.warning( 'Attempted to divide by zero while calculating the ' 'weighted simplex inversion point. All penalty ' 'function scores for the trial force fields are ' 'numerically equivalent.') # Breaking should just exit the while loop. Should still # give you the best force field determined thus far. break else: inv_val = ( sum([x.params[i].value for x in self.new_ffs[:-1]]) / len(self.new_ffs[:-1])) inv_ff.params[i].value = inv_val ref_ff.params[i].value = (2 * inv_val - self.new_ffs[-1].params[i].value) # Calculate score for inverted parameters. self.ff.export_ff(self.ff.path, params=inv_ff.params) data = calculate.main(self.args_ff) inv_ff.score = compare.compare_data(r_data, data) opt.pretty_ff_results(inv_ff) # Calculate score for reflected parameters. self.ff.export_ff(self.ff.path, params=ref_ff.params) data = calculate.main(self.args_ff) ref_ff.score = compare.compare_data(r_data, data) opt.pretty_ff_results(ref_ff) if ref_ff.score < self.new_ffs[0].score: logger.log(20, '~~ ATTEMPTING EXPANSION ~~'.rjust(79, '~')) exp_ff = self.ff.__class__() exp_ff.method = 'EXPANSION' exp_ff.params = copy.deepcopy(best_ff.params) for i in xrange(0, len(self.new_ffs[0].params)): exp_ff.params[i].value = ( 3 * inv_ff.params[i].value - 2 * self.new_ffs[-1].params[i].value) self.ff.export_ff(self.ff.path, exp_ff.params) data = calculate.main(self.args_ff) exp_ff.score = compare.compare_data(r_data, data) opt.pretty_ff_results(exp_ff) if exp_ff.score < ref_ff.score: self.new_ffs[-1] = exp_ff logger.log( 20, ' -- Expansion succeeded. Keeping expanded ' 'parameters.') else: self.new_ffs[-1] = ref_ff logger.log( 20, ' -- Expansion failed. Keeping reflected parameters.') elif ref_ff.score < self.new_ffs[-2].score: logger.log(20, ' -- Keeping reflected parameters.') self.new_ffs[-1] = ref_ff else: logger.log(20, '~~ ATTEMPTING CONTRACTION ~~'.rjust(79, '~')) con_ff = self.ff.__class__() con_ff.method = 'CONTRACTION' con_ff.params = copy.deepcopy(best_ff.params) for i in xrange(0, len(best_ff.params)): if ref_ff.score > self.new_ffs[-1].score: con_val = ((inv_ff.params[i].value + self.new_ffs[-1].params[i].value) / 2) else: con_val = ((3 * inv_ff.params[i].value - self.new_ffs[-1].params[i].value) / 2) con_ff.params[i].value = con_val self.ff.export_ff(self.ff.path, params=con_ff.params) data = calculate.main(self.args_ff) con_ff.score = compare.compare_data(r_data, data) opt.pretty_ff_results(con_ff) if con_ff.score < self.new_ffs[-2].score: self.new_ffs[-1] = con_ff elif self.do_massive_contraction: logger.log( 20, '~~ DOING MASSIVE CONTRACTION ~~'.rjust(79, '~')) for ff_num, ff in enumerate(self.new_ffs[1:]): for i in xrange(0, len(best_ff.params)): ff.params[i].value = ( (ff.params[i].value + self.new_ffs[0].params[i].value) / 2) self.ff.export_ff(self.ff.path, params=ff.params) data = calculate.main(self.args_ff) ff.score = compare.compare_data(r_data, data) ff.method += ' MC' opt.pretty_ff_results(ff) else: logger.log(20, ' -- Contraction failed.') self.new_ffs = sorted(self.new_ffs, key=lambda x: x.score) if self.new_ffs[0].score < last_best: cycles_wo_change = 0 else: cycles_wo_change += 1 logger.log( 20, ' -- {} cycles without change.'.format(cycles_wo_change)) best_ff = self.new_ffs[0] logger.log(20, 'BEST:') opt.pretty_ff_results(self.new_ffs[0], level=20) logger.log( 20, '~~ END SIMPLEX CYCLE {} ~~'.format(current_cycle).rjust( 79, '~')) if best_ff.score < self.ff.score: logger.log( 20, '~~ SIMPLEX FINISHED WITH IMPROVEMENTS ~~'.rjust(79, '~')) best_ff = restore_simp_ff(best_ff, self.ff) else: logger.log( 20, '~~ SIMPLEX FINISHED WITHOUT IMPROVEMENTS ~~'.rjust(79, '~')) opt.pretty_ff_results(self.ff, level=20) opt.pretty_ff_results(best_ff, level=20) logger.log(20, ' -- Writing best force field from simplex.') best_ff.export_ff(best_ff.path) return best_ff
def run(self, r_data=None): """ Once all attributes are setup as you so desire, run this method to optimize the parameters. Returns ------- `datatypes.FF` (or subclass) Contains the best parameters. """ if r_data is None: r_data = opt.return_ref_data(self.args_ref) if self.ff.score is None: logger.log(20, '~~ CALCULATING INITIAL FF SCORE ~~'.rjust(79, '~')) self.ff.export_ff() # Could store data on self.ff.data if we wanted. Not necessary for # simplex. If simplex yielded no improvements, it would return this # FF, and then we might want the data such taht we don't have to # recalculate it in gradient. Let's hope simplex generally yields # improvements. data = calculate.main(self.args_ff) #deprecated #self.ff.score = compare.compare_data(r_data, data) r_dict = compare.data_by_type(r_data) c_dict = compare.data_by_type(data) r_dict, c_dict = compare.trim_data(r_dict, c_dict) self.ff.score = compare.compare_data(r_dict, c_dict) else: logger.log(20, ' -- Reused existing score and data for initial FF.') logger.log(20, '~~ SIMPLEX OPTIMIZATION ~~'.rjust(79, '~')) logger.log(20, 'INIT FF SCORE: {}'.format(self.ff.score)) opt.pretty_ff_results(self.ff, level=20) # Here's what we do if there are too many parameters. if self.max_params and len(self.ff.params) > self.max_params: logger.log(20, ' -- More parameters than the maximum allowed.') logger.log(5, 'CURRENT PARAMS: {}'.format(len(self.ff.params))) logger.log(5, 'MAX PARAMS: {}'.format(self.max_params)) # Here we select the parameters that have the lowest 2nd # derivatives. # Could fail when simplex finds improvements but restores other # parameters. # if self.ff.params[0].d1: if None in [x.d1 for x in self.ff.params]: logger.log(15, ' -- Calculating new parameter derivatives.') # Do central differentiation so we can calculate derivatives. # Another option would be to write code to determine # derivatives only from forward differentiation. ffs = opt.differentiate_ff(self.ff, central=True) # We have to score to get the derivatives. for ff in ffs: ff.export_ff(path=self.ff.path, lines=self.ff_lines) logger.log(20, ' -- Calculating {}.'.format(ff)) data = calculate.main(self.args_ff) #deprecated #ff.score = compare.compare_data(r_data, data) r_dict = compare.data_by_type(r_data) c_dict = compare.data_by_type(data) r_dict, c_dict = compare.trim_data(r_dict, c_dict) ff.score = compare.compare_data(r_dict, c_dict) opt.pretty_ff_results(ff) # Add the derivatives to your original FF. opt.param_derivs(self.ff, ffs) # Only keep the forward differentiated FFs. ffs = opt.extract_forward(ffs) logger.log( 5, ' -- Keeping {} forward differentiated ' 'FFs.'.format(len(ffs))) else: logger.log(15, ' -- Reusing existing parameter derivatives.') # Differentiate all parameters forward. Yes, I know this is # counter-intuitive because we are going to only use subset of # the forward differentiated FFs. However, this is very # computationally inexpensive because we're not scoring them # now. We will remove the forward differentiated FFs we don't # want before scoring. ffs = opt.differentiate_ff(self.ff, central=False) # This sorts the parameters based upon their 2nd derivative. # It keeps the ones with lowest 2nd derivatives. # SCHEDULED FOR CHANGES. NOT A GOOD SORTING CRITERION. params = select_simp_params_on_derivs(self.ff.params, max_params=self.max_params) # From the entire list of forward differentiated FFs, pick # out the ones that have the lowest 2nd derivatives. self.new_ffs = opt.extract_ff_by_params(ffs, params) logger.log(1, '>>> len(self.new_ffs): {}'.format(len(self.new_ffs))) # Reduce number of parameters. # Will need an option that's not MM3* specific in the future. ff_rows = [x.mm3_row for x in params] ff_cols = [x.mm3_col for x in params] for ff in self.new_ffs: new_params = [] for param in ff.params: if param.mm3_row in ff_rows and param.mm3_col in ff_cols: new_params.append(param) ff.params = new_params # Make a copy of your original FF that has less parameters. ff_copy = copy.deepcopy(self.ff) new_params = [] for param in ff.params: if param.mm3_row in ff_rows and param.mm3_col in ff_cols: new_params.append(param) ff_copy.params = new_params else: # In this case it's simple. Just forward differentiate each # parameter. self.new_ffs = opt.differentiate_ff(self.ff, central=False) logger.log(1, '>>> len(self.new_ffs): {}'.format(len(self.new_ffs))) # Still make that FF copy. ff_copy = copy.deepcopy(self.ff) # Double check and make sure they're all scored. for ff in self.new_ffs: if ff.score is None: ff.export_ff(path=self.ff.path, lines=self.ff_lines) logger.log(20, ' -- Calculating {}.'.format(ff)) data = calculate.main(self.args_ff) #deprecated #ff.score = compare.compare_data(r_data, data) r_dict = compare.data_by_type(r_data) c_dict = compare.data_by_type(data) r_dict, c_dict = compare.trim_data(r_dict, c_dict) ff.score = compare.compare_data(r_dict, c_dict) opt.pretty_ff_results(ff) # Add your copy of the orignal to FF to the forward differentiated FFs. self.new_ffs = sorted(self.new_ffs + [ff_copy], key=lambda x: x.score) # Allow 3 cycles w/o change for each parameter present. Remember that # the initial FF was added here, hence the minus one. self._max_cycles_wo_change = 3 * (len(self.new_ffs) - 1) wrapper = textwrap.TextWrapper(width=79) # Shows all FFs parameters. opt.pretty_ff_params(self.new_ffs) # Start the simplex cycles. current_cycle = 0 cycles_wo_change = 0 while current_cycle < self.max_cycles \ and cycles_wo_change < self._max_cycles_wo_change: current_cycle += 1 # Save the last best in case some accidental sort goes on. # Plus it makes reading the code a litle easier. last_best_ff = copy.deepcopy(self.new_ffs[0]) logger.log( 20, '~~ START SIMPLEX CYCLE {} ~~'.format(current_cycle).rjust( 79, '~')) logger.log(20, 'ORDERED FF SCORES:') logger.log( 20, wrapper.fill('{}'.format(' '.join('{:15.4f}'.format(x.score) for x in self.new_ffs)))) inv_ff = self.ff.__class__() if self.do_weighted_reflection: inv_ff.method = 'WEIGHTED INVERSION' else: inv_ff.method = 'INVERSION' inv_ff.params = copy.deepcopy(last_best_ff.params) ref_ff = self.ff.__class__() ref_ff.method = 'REFLECTION' ref_ff.params = copy.deepcopy(last_best_ff.params) # Need score difference sum for weighted inversion. # Calculate this value before going into loop. if self.do_weighted_reflection: # If zero, should break. score_diff_sum = sum([ x.score - self.new_ffs[-1].score for x in self.new_ffs[:-1] ]) if score_diff_sum == 0.: logger.warning('No difference between force field scores. ' 'Exiting simplex.') # We want to raise opt.OptError such that # opt.catch_run_errors will write the best FF obtained thus # far. raise opt.OptError( 'No difference between force field scores. ' 'Exiting simplex.') for i in range(0, len(last_best_ff.params)): if self.do_weighted_reflection: inv_val = (sum([ x.params[i].value * (x.score - self.new_ffs[-1].score) for x in self.new_ffs[:-1] ]) / score_diff_sum) else: inv_val = ( sum([x.params[i].value for x in self.new_ffs[:-1]]) / len(self.new_ffs[:-1])) inv_ff.params[i].value = inv_val ref_ff.params[i].value = (2 * inv_val - self.new_ffs[-1].params[i].value) # The inversion point does not need to be scored. # Calculate score for reflected parameters. ref_ff.export_ff(path=self.ff.path, lines=self.ff.lines) data = calculate.main(self.args_ff) #deprecated #ref_ff.score = compare.compare_data(r_data, data) r_dict = compare.data_by_type(r_data) c_dict = compare.data_by_type(data) r_dict, c_dict = compare.trim_data(r_dict, c_dict) ref_ff.score = compare.compare_data(r_dict, c_dict) opt.pretty_ff_results(ref_ff) if ref_ff.score < last_best_ff.score: logger.log(20, '~~ ATTEMPTING EXPANSION ~~'.rjust(79, '~')) exp_ff = self.ff.__class__() exp_ff.method = 'EXPANSION' exp_ff.params = copy.deepcopy(last_best_ff.params) for i in range(0, len(last_best_ff.params)): exp_ff.params[i].value = ( 3 * inv_ff.params[i].value - 2 * self.new_ffs[-1].params[i].value) exp_ff.export_ff(path=self.ff.path, lines=self.ff.lines) data = calculate.main(self.args_ff) #deprecated #exp_ff.score = compare.compare_data(r_data, data) r_dict = compare.data_by_type(r_data) c_dict = compare.data_by_type(data) r_dict, c_dict = compare.trim_data(r_dict, c_dict) exp_ff.score = compare.compare_data(r_dict, c_dict) opt.pretty_ff_results(exp_ff) if exp_ff.score < ref_ff.score: self.new_ffs[-1] = exp_ff logger.log( 20, ' -- Expansion succeeded. Keeping expanded ' 'parameters.') else: self.new_ffs[-1] = ref_ff logger.log( 20, ' -- Expansion failed. Keeping reflected parameters.') elif ref_ff.score < self.new_ffs[-2].score: logger.log(20, ' -- Keeping reflected parameters.') self.new_ffs[-1] = ref_ff else: logger.log(20, '~~ ATTEMPTING CONTRACTION ~~'.rjust(79, '~')) con_ff = self.ff.__class__() con_ff.method = 'CONTRACTION' con_ff.params = copy.deepcopy(last_best_ff.params) for i in range(0, len(last_best_ff.params)): if ref_ff.score > self.new_ffs[-1].score: con_val = ((inv_ff.params[i].value + self.new_ffs[-1].params[i].value) / 2) else: con_val = ((3 * inv_ff.params[i].value - self.new_ffs[-1].params[i].value) / 2) con_ff.params[i].value = con_val self.ff.export_ff(params=con_ff.params) data = calculate.main(self.args_ff) #deprecated #con_ff.score = compare.compare_data(r_data, data) r_dict = compare.data_by_type(r_data) c_dict = compare.data_by_type(data) r_dict, c_dict = compare.trim_data(r_dict, c_dict) con_ff.score = compare.compare_data(r_dict, c_dict) opt.pretty_ff_results(con_ff) # This change was made to reflect the 1998 Q2MM publication. # if con_ff.score < self.new_ffs[-1].score: if con_ff.score < self.new_ffs[-2].score: logger.log(20, ' -- Contraction succeeded.') self.new_ffs[-1] = con_ff elif self.do_massive_contraction: logger.log( 20, '~~ DOING MASSIVE CONTRACTION ~~'.rjust(79, '~')) for ff_num, ff in enumerate(self.new_ffs[1:]): for i in range(0, len(last_best_ff.params)): ff.params[i].value = ( (ff.params[i].value + self.new_ffs[0].params[i].value) / 2) self.ff.export_ff(params=ff.params) data = calculate.main(self.args_ff) #deprecated #ff.score = compare.compare_data(r_data, data) r_dict = compare.data_by_type(r_data) c_dict = compare.data_by_type(data) r_dict, c_dict = compare.trim_data(r_dict, c_dict) ff.score = compare.compare_data(r_dict, c_dict) ff.method += ' MC' opt.pretty_ff_results(ff) else: logger.log( 20, ' -- Contraction failed. Keeping parmaeters ' 'anyway.') self.new_ffs[-1] = con_ff self.new_ffs = sorted(self.new_ffs, key=lambda x: x.score) # Keep track of the number of cycles without change. If there's # improvement, reset the counter. if self.new_ffs[0].score < last_best_ff.score: cycles_wo_change = 0 else: cycles_wo_change += 1 logger.log( 20, ' -- {} cycles without improvement out of {} ' 'allowed.'.format(cycles_wo_change, self._max_cycles_wo_change)) logger.log(20, 'BEST:') opt.pretty_ff_results(self.new_ffs[0], level=20) logger.log( 20, '~~ END SIMPLEX CYCLE {} ~~'.format(current_cycle).rjust( 79, '~')) # This sort is likely unnecessary because it should be done at the end # of the last loop cycle, but I put it here just in case. self.new_ffs = sorted(self.new_ffs, key=lambda x: x.score) best_ff = self.new_ffs[0] if best_ff.score < self.ff.score: logger.log( 20, '~~ SIMPLEX FINISHED WITH IMPROVEMENTS ~~'.rjust(79, '~')) best_ff = restore_simp_ff(best_ff, self.ff) else: logger.log( 20, '~~ SIMPLEX FINISHED WITHOUT IMPROVEMENTS ~~'.rjust(79, '~')) # This restores the inital parameters, so no need to use # restore_simp_ff here. best_ff = self.ff opt.pretty_ff_results(self.ff, level=20) opt.pretty_ff_results(best_ff, level=20) logger.log(20, ' -- Writing best force field from simplex.') best_ff.export_ff(best_ff.path) return best_ff
def run(self, ref_data=None, restart=None): # We need reference data if you didn't provide it. if ref_data is None: ref_data = opt.return_ref_data(self.args_ref) # We need the initial FF data. if self.ff.data is None: logger.log(20, '~~ GATHERING INITIAL FF DATA ~~'.rjust(79, '~')) # Check whether this is efficient with the ff_lines. self.ff.export_ff() self.ff.data = calculate.main(self.args_ff) # We could do this, but the zeroing of energies has already been # done. # self.ff.score = compare.compare_data(ref_data, self.ff.data) # So instead we do this. compare.correlate_energies(ref_data, self.ff.data) if self.ff.score is None: # Already zeroed reference and correlated the energies. self.ff.score = compare.calculate_score(ref_data, self.ff.data) logger.log(20, 'INITIAL FF SCORE: {}'.format(self.ff.score)) logger.log(20, '~~ GRADIENT OPTIMIZATION ~~'.rjust(79, '~')) # We need a file to hold the differentiated parameter data. logger.log(20, '~~ CENTRAL DIFFERENTIATION ~~'.rjust(79, '~')) if restart: par_file = restart logger.log( 20, ' -- Restarting gradient from central ' 'differentiation file {}.'.format(par_file)) else: par_files = glob.glob(os.path.join(self.direc, 'par_diff_???.txt')) if par_files: par_files.sort() most_recent_par_file = par_files[-1] most_recent_par_file = most_recent_par_file.split('/')[-1] most_recent_num = most_recent_par_file[9:12] num = int(most_recent_num) + 1 par_file = 'par_diff_{:03d}.txt'.format(num) else: par_file = 'par_diff_001.txt' logger.log( 20, ' -- Generating central differentiation ' 'file {}.'.format(par_file)) f = open(os.path.join(self.direc, par_file), 'w') csv_writer = csv.writer(f) # Row 1 - Labels # Row 2 - Weights # Row 3 - Reference data values # Row 4 - Initial FF data values csv_writer.writerow([x.lbl for x in ref_data]) csv_writer.writerow([x.wht for x in ref_data]) csv_writer.writerow([x.val for x in ref_data]) csv_writer.writerow([x.val for x in self.ff.data]) logger.log(20, '~~ DIFFERENTIATING PARAMETERS ~~'.rjust(79, '~')) # Save many FFs, each with their own parameter sets. ffs = opt.differentiate_ff(self.ff) logger.log( 20, '~~ SCORING DIFFERENTIATED PARAMETERS ~~'.rjust(79, '~')) for ff in ffs: ff.export_ff(lines=self.ff.lines) logger.log(20, ' -- Calculating {}.'.format(ff)) data = calculate.main(self.args_ff) compare.correlate_energies(ref_data, data) ff.score = compare.calculate_score(ref_data, data) opt.pretty_ff_results(ff) # Write the data rather than storing it in memory. For large parameter # sets, this could consume GBs of memory otherwise! csv_writer.writerow([x.val for x in data]) f.close() # Calculate the Jacobian, residual vector, matrix A and vector b. # These aren't needed if you're only doing Newton-Raphson. if self.do_lstsq or self.do_lagrange or self.do_levenberg or \ self.do_svd: logger.log(20, '~~ JACOBIAN AND RESIDUAL VECTOR ~~'.rjust(79, '~')) # Setup the residual vector. num_d = len(ref_data) resid = np.empty((num_d, 1), dtype=float) for i in xrange(0, num_d): resid[i, 0] = ref_data[i].wht * (ref_data[i].val - self.ff.data[i].val) # logger.log(5, 'RESIDUAL VECTOR:\n{}'.format(resid)) logger.log(20, ' -- Formed {} residual vector.'.format(resid.shape)) # Setup the Jacobian. num_p = len(self.ff.params) # Maybe should be a part of the Jacobian function. jacob = np.empty((num_d, num_p), dtype=float) jacob = return_jacobian(jacob, os.path.join(self.direc, par_file)) # logger.log(5, 'JACOBIAN:\n{}'.format(jacob)) logger.log(20, ' -- Formed {} Jacobian.'.format(jacob.shape)) ma = jacob.T.dot(jacob) vb = jacob.T.dot(resid) # We need these for most optimization methods. logger.log(5, ' MATRIX A AND VECTOR B '.center(79, '-')) # logger.log(5, 'A:\n{}'.format(ma)) # logger.log(5, 'b:\n{}'.format(vb)) # Start coming up with new parameter sets. if self.do_newton and not restart: logger.log(20, '~~ NEWTON-RAPHSON ~~'.rjust(79, '~')) # Make sure we have derivative information. if self.ff.params[0].d1 is None: opt.param_derivs(self.ff, ffs) changes = do_newton(self.ff.params, radii=self.newton_radii, cutoffs=self.newton_cutoffs) cleanup(self.new_ffs, self.ff, changes) if self.do_lstsq: logger.log(20, '~~ LEAST SQUARES ~~'.rjust(79, '~')) changes = do_lstsq(ma, vb, radii=self.lstsq_radii, cutoffs=self.lstsq_cutoffs) cleanup(self.new_ffs, self.ff, changes) if self.do_lagrange: logger.log(20, '~~ LAGRANGE ~~'.rjust(79, '~')) for factor in sorted(self.lagrange_factors): changes = do_lagrange(ma, vb, factor, radii=self.lagrange_radii, cutoffs=self.lagrange_cutoffs) cleanup(self.new_ffs, self.ff, changes) if self.do_levenberg: logger.log(20, '~~ LEVENBERG ~~'.rjust(79, '~')) for factor in sorted(self.levenberg_factors): changes = do_levenberg(ma, vb, factor, radii=self.levenberg_radii, cutoffs=self.levenberg_cutoffs) cleanup(self.new_ffs, self.ff, changes) if self.do_svd: logger.log(20, '~~ SINGULAR VALUE DECOMPOSITION ~~'.rjust(79, '~')) # J = U . s . VT mu, vs, mvt = return_svd(jacob) logger.log(1, '>>> mu.shape: {}'.format(mu.shape)) logger.log(1, '>>> vs.shape: {}'.format(vs.shape)) logger.log(1, '>>> mvt.shape: {}'.format(mvt.shape)) logger.log(1, '>>> vb.shape: {}'.format(vb.shape)) if self.svd_factors: changes = do_svd_w_thresholds(mu, vs, mvt, resid, self.svd_factors, radii=self.svd_radii, cutoffs=self.svd_cutoffs) else: changes = do_svd_wo_thresholds(mu, vs, mvt, resid, radii=self.svd_radii, cutoffs=self.svd_cutoffs) cleanup(self.new_ffs, self.ff, changes) # Report how many trial FFs were generated. logger.log( 20, ' -- Generated {} trial force field(s).'.format( len(self.new_ffs))) # If there are any trials, test them. if self.new_ffs: logger.log(20, '~~ EVALUATING TRIAL FF(S) ~~'.rjust(79, '~')) for ff in self.new_ffs: data = opt.cal_ff(ff, self.args_ff, parent_ff=self.ff) # Shouldn't need to zero anymore. ff.score = compare.compare_data(ref_data, data) opt.pretty_ff_results(ff) self.new_ffs = sorted(self.new_ffs, key=lambda x: x.score) # Check for improvement. if self.new_ffs[0].score < self.ff.score: ff = self.new_ffs[0] logger.log( 20, '~~ GRADIENT FINISHED WITH IMPROVEMENTS ~~'.rjust(79, '~')) opt.pretty_ff_results(self.ff, level=20) opt.pretty_ff_results(ff, level=20) # Copy parameter derivatives from original FF to save time in # case we move onto simplex immediately after this. copy_derivs(self.ff, ff) else: ff = self.ff else: ff = self.ff return ff