def run(self, r_data=None): """ Once all attributes are setup as you so desire, run this method to optimize the parameters. Returns ------- `datatypes.FF` (or subclass) Contains the best parameters. """ if r_data is None: r_data = opt.return_ref_data(self.args_ref) if self.ff.score is None: logger.log(20, '~~ CALCULATING INITIAL FF SCORE ~~'.rjust(79, '~')) self.ff.export_ff() # Could store data on self.ff.data if we wanted. Not necessary for # simplex. If simplex yielded no improvements, it would return this # FF, and then we might want the data such taht we don't have to # recalculate it in gradient. Let's hope simplex generally yields # improvements. data = calculate.main(self.args_ff) self.ff.score = compare.compare_data(r_data, data) else: logger.log(20, ' -- Reused existing score and data for initial FF.') logger.log(20, '~~ SIMPLEX OPTIMIZATION ~~'.rjust(79, '~')) logger.log(20, 'INIT FF SCORE: {}'.format(self.ff.score)) opt.pretty_ff_results(self.ff, level=20) if self.max_params and len(self.ff.params) > self.max_params: logger.log(20, ' -- More parameters than the maximum allowed.') logger.log(5, 'CURRENT PARAMS: {}'.format(len(self.ff.params))) logger.log(5, 'MAX PARAMS: {}'.format(self.max_params)) # Here we select the parameters that have the lowest 2nd # derivatives. # THIS IS SCHEDULED FOR CHANGING. THIS IS ACTUALLY NOT A GOOD # CRITERION FOR PARAMETER SELECTION. if self.ff.params[0].d1: logger.log(15, ' -- Reusing existing parameter derivatives.') # Differentiate all parameters forward. Yes, I know this is # counter-intuitive because we are going to only use subset of # the forward differentiated FFs. However, this is very # computationally inexpensive because we're not scoring them # now. We will remove the forward differentiated FFs we don't # want before scoring. ffs = opt.differentiate_ff(self.ff, central=False) else: logger.log(15, ' -- Calculating new parameter derivatives.') # Do central differentiation so we can calculate derivatives. # Another option would be to write code to determine # derivatives only from forward differentiation. ffs = opt.differentiate_ff(self.ff, central=True) # We have to score to get the derivatives. for ff in ffs: ff.export_ff(lines=self.ff_lines) logger.log(20, ' -- Calculating {}.'.format(ff)) data = calculate.main(self.args_ff) ff.score = compare.compare_data(r_data, data) opt.pretty_ff_results(ff) # Add the derivatives to your original FF. opt.param_derivs(self.ff, ffs) # Only keep the forward differentiated FFs. ffs = opt.extract_forward(ffs) logger.log(5, ' -- Keeping {} forward differentiated ' 'FFs.'.format(len(ffs))) # This sorts the parameters based upon their 2nd derivative. # It keeps the ones with lowest 2nd derivatives. # SCHEDULED FOR CHANGES. NOT A GOOD SORTING CRITERION. params = select_simp_params_on_derivs( self.ff.params, max_params=self.max_params) # From the entire list of forward differentiated FFs, pick # out the ones that have the lowest 2nd derivatives. self.new_ffs = opt.extract_ff_by_params(ffs, params) logger.log(1, '>>> len(self.new_ffs): {}'.format(len(self.new_ffs))) # Reduce number of parameters. # Will need an option that's not MM3* specific in the future. ff_rows = [x.mm3_row for x in params] ff_cols = [x.mm3_col for x in params] for ff in self.new_ffs: new_params = [] for param in ff.params: if param.mm3_row in ff_rows and param.mm3_col in ff_cols: new_params.append(param) ff.params = new_params # Make a copy of your original FF that has less parameters. ff_copy = copy.deepcopy(self.ff) new_params = [] for param in ff.params: if param.mm3_row in ff_rows and param.mm3_col in ff_cols: new_params.append(param) ff_copy.params = new_params else: # In this case it's simple. Just forward differentiate each # parameter. self.new_ffs = opt.differentiate_ff(self.ff, central=False) logger.log(1, '>>> len(self.new_ffs): {}'.format(len(self.new_ffs))) # Still make that FF copy. ff_copy = copy.deepcopy(self.ff) # Double check and make sure they're all scored. for ff in self.new_ffs: if ff.score is None: ff.export_ff(lines=self.ff_lines) logger.log(20, ' -- Calculating {}.'.format(ff)) data = calculate.main(self.args_ff) ff.score = compare.compare_data(r_data, data) opt.pretty_ff_results(ff) # Add your copy of the orignal to FF to the forward differentiated FFs. self.new_ffs = sorted(self.new_ffs + [ff_copy], key=lambda x: x.score) # Allow 3 cycles w/o change for each parameter present. Remember that # the initial FF was added here, hence the minus one. self._max_cycles_wo_change = 3 * (len(self.new_ffs) - 1) wrapper = textwrap.TextWrapper(width=79) # Shows all FFs parameters. opt.pretty_ff_params(self.new_ffs) # Start the simplex cycles. current_cycle = 0 cycles_wo_change = 0 while current_cycle < self.max_cycles \ and cycles_wo_change < self._max_cycles_wo_change: current_cycle += 1 last_best = self.new_ffs[0].score best_ff = self.new_ffs[0] logger.log(20, '~~ START SIMPLEX CYCLE {} ~~'.format( current_cycle).rjust(79, '~')) logger.log(20, 'ORDERED FF SCORES:') logger.log(20, wrapper.fill('{}'.format( ' '.join('{:15.4f}'.format(x.score) for x in self.new_ffs)))) # !!! FOR TESTING !!! # Write the best and worst FFs to some other directory. Then # write the worst FF to optimization working directory. Then # raise opt.OptError. The worst FF should be overwritten by # the best FF afterwards. # if current_cycle == 5: # self.new_ffs[-1].export_ff( # path='ref_methanol_flds/mm3_worst.fld', # lines=self.ff.lines) # self.new_ffs[0].export_ff( # path='ref_methanol_flds/mm3_best.fld', # lines=self.ff.lines) # self.new_ffs[-1].export_ff( # path='ref_methanol/mm3.fld', # lines=self.ff.lines) # raise opt.OptError # !!! END TESTING !!! inv_ff = self.ff.__class__() if self.do_weighted_reflection: inv_ff.method = 'WEIGHTED INVERSION' else: inv_ff.method = 'INVERSION' inv_ff.params = copy.deepcopy(best_ff.params) ref_ff = self.ff.__class__() ref_ff.method = 'REFLECTION' ref_ff.params = copy.deepcopy(best_ff.params) # Need score difference sum for weighted inversion. # Calculate this value before going into loop. if self.do_weighted_reflection: # If zero, should break. score_diff_sum = sum([x.score - self.new_ffs[-1].score for x in self.new_ffs[:-1]]) if score_diff_sum == 0.: logger.warning( 'No difference between force field scores. ' 'Exiting simplex.') # We want to raise opt.OptError such that # opt.catch_run_errors will write the best FF obtained thus # far. raise opt.OptError( 'No difference between force field scores. ' 'Exiting simplex.') for i in xrange(0, len(best_ff.params)): if self.do_weighted_reflection: inv_val = ( sum([x.params[i].value * (x.score - self.new_ffs[-1].score) for x in self.new_ffs[:-1]]) / score_diff_sum) else: inv_val = ( sum([x.params[i].value for x in self.new_ffs[:-1]]) / len(self.new_ffs[:-1])) inv_ff.params[i].value = inv_val ref_ff.params[i].value = ( 2 * inv_val - self.new_ffs[-1].params[i].value) # The inversion point does not need to be scored. # Calculate score for reflected parameters. self.ff.export_ff(self.ff.path, params=ref_ff.params) data = calculate.main(self.args_ff) ref_ff.score = compare.compare_data(r_data, data) opt.pretty_ff_results(ref_ff) if ref_ff.score < self.new_ffs[0].score: logger.log(20, '~~ ATTEMPTING EXPANSION ~~'.rjust(79, '~')) exp_ff = self.ff.__class__() exp_ff.method = 'EXPANSION' exp_ff.params = copy.deepcopy(best_ff.params) for i in xrange(0, len(self.new_ffs[0].params)): exp_ff.params[i].value = ( 3 * inv_ff.params[i].value - 2 * self.new_ffs[-1].params[i].value) self.ff.export_ff(self.ff.path, exp_ff.params) data = calculate.main(self.args_ff) exp_ff.score = compare.compare_data(r_data, data) opt.pretty_ff_results(exp_ff) if exp_ff.score < ref_ff.score: self.new_ffs[-1] = exp_ff logger.log( 20, ' -- Expansion succeeded. Keeping expanded ' 'parameters.') else: self.new_ffs[-1] = ref_ff logger.log( 20, ' -- Expansion failed. Keeping reflected parameters.') elif ref_ff.score < self.new_ffs[-2].score: logger.log(20, ' -- Keeping reflected parameters.') self.new_ffs[-1] = ref_ff else: logger.log(20, '~~ ATTEMPTING CONTRACTION ~~'.rjust(79, '~')) con_ff = self.ff.__class__() con_ff.method = 'CONTRACTION' con_ff.params = copy.deepcopy(best_ff.params) for i in xrange(0, len(best_ff.params)): if ref_ff.score > self.new_ffs[-1].score: con_val = ( (inv_ff.params[i].value + self.new_ffs[-1].params[i].value) / 2) else: con_val = ( (3 * inv_ff.params[i].value - self.new_ffs[-1].params[i].value) / 2) con_ff.params[i].value = con_val self.ff.export_ff(self.ff.path, params=con_ff.params) data = calculate.main(self.args_ff) con_ff.score = compare.compare_data(r_data, data) opt.pretty_ff_results(con_ff) # This change was made to reflect the 1998 Q2MM publication. # if con_ff.score < self.new_ffs[-1].score: if con_ff.score < self.new_ffs[-2].score: logger.log(20, ' -- Contraction succeeded.') self.new_ffs[-1] = con_ff elif self.do_massive_contraction: logger.log( 20, '~~ DOING MASSIVE CONTRACTION ~~'.rjust(79, '~')) for ff_num, ff in enumerate(self.new_ffs[1:]): for i in xrange(0, len(best_ff.params)): ff.params[i].value = ( (ff.params[i].value + self.new_ffs[0].params[i].value) / 2) self.ff.export_ff(self.ff.path, params=ff.params) data = calculate.main(self.args_ff) ff.score = compare.compare_data(r_data, data) ff.method += ' MC' opt.pretty_ff_results(ff) else: logger.log( 20, ' -- Contraction failed. Keeping parmaeters ' 'anyway.') self.new_ffs[-1] = con_ff self.new_ffs = sorted(self.new_ffs, key=lambda x: x.score) if self.new_ffs[0].score < last_best: cycles_wo_change = 0 else: cycles_wo_change += 1 logger.log(20, ' -- {} cycles without improvement out of {} ' 'allowed.'.format( cycles_wo_change, self._max_cycles_wo_change)) best_ff = self.new_ffs[0] logger.log(20, 'BEST:') opt.pretty_ff_results(self.new_ffs[0], level=20) logger.log(20, '~~ END SIMPLEX CYCLE {} ~~'.format( current_cycle).rjust(79, '~')) if best_ff.score < self.ff.score: logger.log(20, '~~ SIMPLEX FINISHED WITH IMPROVEMENTS ~~'.rjust( 79, '~')) best_ff = restore_simp_ff(best_ff, self.ff) else: logger.log(20, '~~ SIMPLEX FINISHED WITHOUT IMPROVEMENTS ~~'.rjust( 79, '~')) opt.pretty_ff_results(self.ff, level=20) opt.pretty_ff_results(best_ff, level=20) logger.log(20, ' -- Writing best force field from simplex.') best_ff.export_ff(best_ff.path) return best_ff
def reduce_num_simp_ffs(ffs, params): simp_ffs = opt.extract_forward(ffs) simp_ffs = opt.extract_ff_by_params(ffs, params) return simp_ffs
def run(self, r_data=None): """ Once all attributes are setup as you so desire, run this method to optimize the parameters. Returns ------- `datatypes.FF` (or subclass) Contains the best parameters. """ if r_data is None: r_data = opt.return_ref_data(self.args_ref) logger.log(20, '~~ SIMPLEX OPTIMIZATION ~~'.rjust(79, '~')) # Here we don't actually need the database connection/force field data. # We only need the score. if self.ff.score is None: logger.log(20, '~~ CALCULATING INITIAL FF SCORE ~~'.rjust(79, '~')) self.ff.export_ff() # I could store this object to prevent on self.ff to prevent garbage # collection. Would be nice if simplex was followed by gradient, # which needs that information, and if simplex yielded no # improvements. At most points in the optimization, this is probably # too infrequent for it to be worth the memory, but it might be nice # once the parameters are close to convergence. data = calculate.main(self.args_ff) self.ff.score = compare.compare_data(r_data, data) logger.log(20, 'INITIAL FF SCORE: {}'.format(self.ff.score)) else: logger.log(15, ' -- Reused existing score and data for initial FF.') logger.log(15, 'INIT FF SCORE: {}'.format(self.ff.score)) if self.max_params and len(self.ff.params) > self.max_params: if self.ff.params[0].d1: logger.log(15, ' -- Reusing existing parameter derivatives.') # Don't score so this really doesn't take much time. ffs = opt.differentiate_ff(self.ff, central=False) else: logger.log(15, ' -- Calculating new parameter derivatives.') ffs = opt.differentiate_ff(self.ff, central=True) # We have to score to get the derivatives. for ff in ffs: ff.export_ff(lines=self.ff_lines) logger.log(20, ' -- Calculating {}.'.format(ff)) data = calculate.main(self.args_ff) ff.score = compare.compare_data(r_data, data) opt.pretty_ff_results(ff) opt.param_derivs(self.ff, ffs) # Only keep the forward differentiated FFs. ffs = opt.extract_forward(ffs) params = select_simp_params_on_derivs(self.ff.params, max_params=self.max_params) self.new_ffs = opt.extract_ff_by_params(ffs, params) # Reduce number of parameters. # Will need an option that's not MM3* specific. ff_rows = [x.mm3_row for x in params] ff_cols = [x.mm3_col for x in params] for ff in self.new_ffs: new_params = [] for param in ff.params: if param.mm3_row in ff_rows and param.mm3_col in ff_cols: new_params.append(param) ff.params = new_params else: self.new_ffs = opt.differentiate_ff(self.ff, central=False) # Double check and make sure they're all scored. for ff in self.new_ffs: if ff.score is None: ff.export_ff(lines=self.ff_lines) logger.log(20, ' -- Calculating {}.'.format(ff)) data = calculate.main(self.args_ff) ff.score = compare.compare_data(r_data, data) opt.pretty_ff_results(ff) ff_copy = copy.deepcopy(self.ff) new_params = [] for param in ff.params: if param.mm3_row in ff_rows and param.mm3_col in ff_cols: new_params.append(param) ff_copy.params = new_params self.new_ffs = sorted(self.new_ffs + [ff_copy], key=lambda x: x.score) wrapper = textwrap.TextWrapper(width=79) logger.log(20, 'ORDERED FF SCORES:') logger.log( 20, wrapper.fill('{}'.format(' '.join('{:15.4f}'.format(x.score) for x in self.new_ffs)))) # Shows all FFs parameters. opt.pretty_ff_params(self.new_ffs) # Start the simplex cycles. current_cycle = 0 cycles_wo_change = 0 while current_cycle < self.max_cycles \ and cycles_wo_change < self.max_cycles_wo_change: current_cycle += 1 last_best = self.new_ffs[0].score best_ff = self.new_ffs[0] logger.log( 20, '~~ START SIMPLEX CYCLE {} ~~'.format(current_cycle).rjust( 79, '~')) inv_ff = self.ff.__class__() if self.do_weighted_reflection: inv_ff.method = 'WEIGHTED INVERSION' else: inv_ff.method = 'INVERSION' inv_ff.params = copy.deepcopy(best_ff.params) ref_ff = self.ff.__class__() ref_ff.method = 'REFLECTION' ref_ff.params = copy.deepcopy(best_ff.params) for i in xrange(0, len(best_ff.params)): if self.do_weighted_reflection: try: inv_val = (sum([ x.params[i].value * (x.score - self.new_ffs[-1].score) for x in self.new_ffs[:-1] ]) / sum([ x.score - self.new_ffs[-1].score for x in self.new_ffs[:-1] ])) except ZeroDivisionError: logger.warning( 'Attempted to divide by zero while calculating the ' 'weighted simplex inversion point. All penalty ' 'function scores for the trial force fields are ' 'numerically equivalent.') # Breaking should just exit the while loop. Should still # give you the best force field determined thus far. break else: inv_val = ( sum([x.params[i].value for x in self.new_ffs[:-1]]) / len(self.new_ffs[:-1])) inv_ff.params[i].value = inv_val ref_ff.params[i].value = (2 * inv_val - self.new_ffs[-1].params[i].value) # Calculate score for inverted parameters. self.ff.export_ff(self.ff.path, params=inv_ff.params) data = calculate.main(self.args_ff) inv_ff.score = compare.compare_data(r_data, data) opt.pretty_ff_results(inv_ff) # Calculate score for reflected parameters. self.ff.export_ff(self.ff.path, params=ref_ff.params) data = calculate.main(self.args_ff) ref_ff.score = compare.compare_data(r_data, data) opt.pretty_ff_results(ref_ff) if ref_ff.score < self.new_ffs[0].score: logger.log(20, '~~ ATTEMPTING EXPANSION ~~'.rjust(79, '~')) exp_ff = self.ff.__class__() exp_ff.method = 'EXPANSION' exp_ff.params = copy.deepcopy(best_ff.params) for i in xrange(0, len(self.new_ffs[0].params)): exp_ff.params[i].value = ( 3 * inv_ff.params[i].value - 2 * self.new_ffs[-1].params[i].value) self.ff.export_ff(self.ff.path, exp_ff.params) data = calculate.main(self.args_ff) exp_ff.score = compare.compare_data(r_data, data) opt.pretty_ff_results(exp_ff) if exp_ff.score < ref_ff.score: self.new_ffs[-1] = exp_ff logger.log( 20, ' -- Expansion succeeded. Keeping expanded ' 'parameters.') else: self.new_ffs[-1] = ref_ff logger.log( 20, ' -- Expansion failed. Keeping reflected parameters.') elif ref_ff.score < self.new_ffs[-2].score: logger.log(20, ' -- Keeping reflected parameters.') self.new_ffs[-1] = ref_ff else: logger.log(20, '~~ ATTEMPTING CONTRACTION ~~'.rjust(79, '~')) con_ff = self.ff.__class__() con_ff.method = 'CONTRACTION' con_ff.params = copy.deepcopy(best_ff.params) for i in xrange(0, len(best_ff.params)): if ref_ff.score > self.new_ffs[-1].score: con_val = ((inv_ff.params[i].value + self.new_ffs[-1].params[i].value) / 2) else: con_val = ((3 * inv_ff.params[i].value - self.new_ffs[-1].params[i].value) / 2) con_ff.params[i].value = con_val self.ff.export_ff(self.ff.path, params=con_ff.params) data = calculate.main(self.args_ff) con_ff.score = compare.compare_data(r_data, data) opt.pretty_ff_results(con_ff) if con_ff.score < self.new_ffs[-2].score: self.new_ffs[-1] = con_ff elif self.do_massive_contraction: logger.log( 20, '~~ DOING MASSIVE CONTRACTION ~~'.rjust(79, '~')) for ff_num, ff in enumerate(self.new_ffs[1:]): for i in xrange(0, len(best_ff.params)): ff.params[i].value = ( (ff.params[i].value + self.new_ffs[0].params[i].value) / 2) self.ff.export_ff(self.ff.path, params=ff.params) data = calculate.main(self.args_ff) ff.score = compare.compare_data(r_data, data) ff.method += ' MC' opt.pretty_ff_results(ff) else: logger.log(20, ' -- Contraction failed.') self.new_ffs = sorted(self.new_ffs, key=lambda x: x.score) if self.new_ffs[0].score < last_best: cycles_wo_change = 0 else: cycles_wo_change += 1 logger.log( 20, ' -- {} cycles without change.'.format(cycles_wo_change)) best_ff = self.new_ffs[0] logger.log(20, 'BEST:') opt.pretty_ff_results(self.new_ffs[0], level=20) logger.log( 20, '~~ END SIMPLEX CYCLE {} ~~'.format(current_cycle).rjust( 79, '~')) if best_ff.score < self.ff.score: logger.log( 20, '~~ SIMPLEX FINISHED WITH IMPROVEMENTS ~~'.rjust(79, '~')) best_ff = restore_simp_ff(best_ff, self.ff) else: logger.log( 20, '~~ SIMPLEX FINISHED WITHOUT IMPROVEMENTS ~~'.rjust(79, '~')) opt.pretty_ff_results(self.ff, level=20) opt.pretty_ff_results(best_ff, level=20) logger.log(20, ' -- Writing best force field from simplex.') best_ff.export_ff(best_ff.path) return best_ff
def run(self, r_data=None): """ Once all attributes are setup as you so desire, run this method to optimize the parameters. Returns ------- `datatypes.FF` (or subclass) Contains the best parameters. """ if r_data is None: r_data = opt.return_ref_data(self.args_ref) if self.ff.score is None: logger.log(20, '~~ CALCULATING INITIAL FF SCORE ~~'.rjust(79, '~')) self.ff.export_ff() # Could store data on self.ff.data if we wanted. Not necessary for # simplex. If simplex yielded no improvements, it would return this # FF, and then we might want the data such taht we don't have to # recalculate it in gradient. Let's hope simplex generally yields # improvements. data = calculate.main(self.args_ff) #deprecated #self.ff.score = compare.compare_data(r_data, data) r_dict = compare.data_by_type(r_data) c_dict = compare.data_by_type(data) r_dict, c_dict = compare.trim_data(r_dict, c_dict) self.ff.score = compare.compare_data(r_dict, c_dict) else: logger.log(20, ' -- Reused existing score and data for initial FF.') logger.log(20, '~~ SIMPLEX OPTIMIZATION ~~'.rjust(79, '~')) logger.log(20, 'INIT FF SCORE: {}'.format(self.ff.score)) opt.pretty_ff_results(self.ff, level=20) # Here's what we do if there are too many parameters. if self.max_params and len(self.ff.params) > self.max_params: logger.log(20, ' -- More parameters than the maximum allowed.') logger.log(5, 'CURRENT PARAMS: {}'.format(len(self.ff.params))) logger.log(5, 'MAX PARAMS: {}'.format(self.max_params)) # Here we select the parameters that have the lowest 2nd # derivatives. # Could fail when simplex finds improvements but restores other # parameters. # if self.ff.params[0].d1: if None in [x.d1 for x in self.ff.params]: logger.log(15, ' -- Calculating new parameter derivatives.') # Do central differentiation so we can calculate derivatives. # Another option would be to write code to determine # derivatives only from forward differentiation. ffs = opt.differentiate_ff(self.ff, central=True) # We have to score to get the derivatives. for ff in ffs: ff.export_ff(path=self.ff.path, lines=self.ff_lines) logger.log(20, ' -- Calculating {}.'.format(ff)) data = calculate.main(self.args_ff) #deprecated #ff.score = compare.compare_data(r_data, data) r_dict = compare.data_by_type(r_data) c_dict = compare.data_by_type(data) r_dict, c_dict = compare.trim_data(r_dict, c_dict) ff.score = compare.compare_data(r_dict, c_dict) opt.pretty_ff_results(ff) # Add the derivatives to your original FF. opt.param_derivs(self.ff, ffs) # Only keep the forward differentiated FFs. ffs = opt.extract_forward(ffs) logger.log( 5, ' -- Keeping {} forward differentiated ' 'FFs.'.format(len(ffs))) else: logger.log(15, ' -- Reusing existing parameter derivatives.') # Differentiate all parameters forward. Yes, I know this is # counter-intuitive because we are going to only use subset of # the forward differentiated FFs. However, this is very # computationally inexpensive because we're not scoring them # now. We will remove the forward differentiated FFs we don't # want before scoring. ffs = opt.differentiate_ff(self.ff, central=False) # This sorts the parameters based upon their 2nd derivative. # It keeps the ones with lowest 2nd derivatives. # SCHEDULED FOR CHANGES. NOT A GOOD SORTING CRITERION. params = select_simp_params_on_derivs(self.ff.params, max_params=self.max_params) # From the entire list of forward differentiated FFs, pick # out the ones that have the lowest 2nd derivatives. self.new_ffs = opt.extract_ff_by_params(ffs, params) logger.log(1, '>>> len(self.new_ffs): {}'.format(len(self.new_ffs))) # Reduce number of parameters. # Will need an option that's not MM3* specific in the future. ff_rows = [x.mm3_row for x in params] ff_cols = [x.mm3_col for x in params] for ff in self.new_ffs: new_params = [] for param in ff.params: if param.mm3_row in ff_rows and param.mm3_col in ff_cols: new_params.append(param) ff.params = new_params # Make a copy of your original FF that has less parameters. ff_copy = copy.deepcopy(self.ff) new_params = [] for param in ff.params: if param.mm3_row in ff_rows and param.mm3_col in ff_cols: new_params.append(param) ff_copy.params = new_params else: # In this case it's simple. Just forward differentiate each # parameter. self.new_ffs = opt.differentiate_ff(self.ff, central=False) logger.log(1, '>>> len(self.new_ffs): {}'.format(len(self.new_ffs))) # Still make that FF copy. ff_copy = copy.deepcopy(self.ff) # Double check and make sure they're all scored. for ff in self.new_ffs: if ff.score is None: ff.export_ff(path=self.ff.path, lines=self.ff_lines) logger.log(20, ' -- Calculating {}.'.format(ff)) data = calculate.main(self.args_ff) #deprecated #ff.score = compare.compare_data(r_data, data) r_dict = compare.data_by_type(r_data) c_dict = compare.data_by_type(data) r_dict, c_dict = compare.trim_data(r_dict, c_dict) ff.score = compare.compare_data(r_dict, c_dict) opt.pretty_ff_results(ff) # Add your copy of the orignal to FF to the forward differentiated FFs. self.new_ffs = sorted(self.new_ffs + [ff_copy], key=lambda x: x.score) # Allow 3 cycles w/o change for each parameter present. Remember that # the initial FF was added here, hence the minus one. self._max_cycles_wo_change = 3 * (len(self.new_ffs) - 1) wrapper = textwrap.TextWrapper(width=79) # Shows all FFs parameters. opt.pretty_ff_params(self.new_ffs) # Start the simplex cycles. current_cycle = 0 cycles_wo_change = 0 while current_cycle < self.max_cycles \ and cycles_wo_change < self._max_cycles_wo_change: current_cycle += 1 # Save the last best in case some accidental sort goes on. # Plus it makes reading the code a litle easier. last_best_ff = copy.deepcopy(self.new_ffs[0]) logger.log( 20, '~~ START SIMPLEX CYCLE {} ~~'.format(current_cycle).rjust( 79, '~')) logger.log(20, 'ORDERED FF SCORES:') logger.log( 20, wrapper.fill('{}'.format(' '.join('{:15.4f}'.format(x.score) for x in self.new_ffs)))) inv_ff = self.ff.__class__() if self.do_weighted_reflection: inv_ff.method = 'WEIGHTED INVERSION' else: inv_ff.method = 'INVERSION' inv_ff.params = copy.deepcopy(last_best_ff.params) ref_ff = self.ff.__class__() ref_ff.method = 'REFLECTION' ref_ff.params = copy.deepcopy(last_best_ff.params) # Need score difference sum for weighted inversion. # Calculate this value before going into loop. if self.do_weighted_reflection: # If zero, should break. score_diff_sum = sum([ x.score - self.new_ffs[-1].score for x in self.new_ffs[:-1] ]) if score_diff_sum == 0.: logger.warning('No difference between force field scores. ' 'Exiting simplex.') # We want to raise opt.OptError such that # opt.catch_run_errors will write the best FF obtained thus # far. raise opt.OptError( 'No difference between force field scores. ' 'Exiting simplex.') for i in range(0, len(last_best_ff.params)): if self.do_weighted_reflection: inv_val = (sum([ x.params[i].value * (x.score - self.new_ffs[-1].score) for x in self.new_ffs[:-1] ]) / score_diff_sum) else: inv_val = ( sum([x.params[i].value for x in self.new_ffs[:-1]]) / len(self.new_ffs[:-1])) inv_ff.params[i].value = inv_val ref_ff.params[i].value = (2 * inv_val - self.new_ffs[-1].params[i].value) # The inversion point does not need to be scored. # Calculate score for reflected parameters. ref_ff.export_ff(path=self.ff.path, lines=self.ff.lines) data = calculate.main(self.args_ff) #deprecated #ref_ff.score = compare.compare_data(r_data, data) r_dict = compare.data_by_type(r_data) c_dict = compare.data_by_type(data) r_dict, c_dict = compare.trim_data(r_dict, c_dict) ref_ff.score = compare.compare_data(r_dict, c_dict) opt.pretty_ff_results(ref_ff) if ref_ff.score < last_best_ff.score: logger.log(20, '~~ ATTEMPTING EXPANSION ~~'.rjust(79, '~')) exp_ff = self.ff.__class__() exp_ff.method = 'EXPANSION' exp_ff.params = copy.deepcopy(last_best_ff.params) for i in range(0, len(last_best_ff.params)): exp_ff.params[i].value = ( 3 * inv_ff.params[i].value - 2 * self.new_ffs[-1].params[i].value) exp_ff.export_ff(path=self.ff.path, lines=self.ff.lines) data = calculate.main(self.args_ff) #deprecated #exp_ff.score = compare.compare_data(r_data, data) r_dict = compare.data_by_type(r_data) c_dict = compare.data_by_type(data) r_dict, c_dict = compare.trim_data(r_dict, c_dict) exp_ff.score = compare.compare_data(r_dict, c_dict) opt.pretty_ff_results(exp_ff) if exp_ff.score < ref_ff.score: self.new_ffs[-1] = exp_ff logger.log( 20, ' -- Expansion succeeded. Keeping expanded ' 'parameters.') else: self.new_ffs[-1] = ref_ff logger.log( 20, ' -- Expansion failed. Keeping reflected parameters.') elif ref_ff.score < self.new_ffs[-2].score: logger.log(20, ' -- Keeping reflected parameters.') self.new_ffs[-1] = ref_ff else: logger.log(20, '~~ ATTEMPTING CONTRACTION ~~'.rjust(79, '~')) con_ff = self.ff.__class__() con_ff.method = 'CONTRACTION' con_ff.params = copy.deepcopy(last_best_ff.params) for i in range(0, len(last_best_ff.params)): if ref_ff.score > self.new_ffs[-1].score: con_val = ((inv_ff.params[i].value + self.new_ffs[-1].params[i].value) / 2) else: con_val = ((3 * inv_ff.params[i].value - self.new_ffs[-1].params[i].value) / 2) con_ff.params[i].value = con_val self.ff.export_ff(params=con_ff.params) data = calculate.main(self.args_ff) #deprecated #con_ff.score = compare.compare_data(r_data, data) r_dict = compare.data_by_type(r_data) c_dict = compare.data_by_type(data) r_dict, c_dict = compare.trim_data(r_dict, c_dict) con_ff.score = compare.compare_data(r_dict, c_dict) opt.pretty_ff_results(con_ff) # This change was made to reflect the 1998 Q2MM publication. # if con_ff.score < self.new_ffs[-1].score: if con_ff.score < self.new_ffs[-2].score: logger.log(20, ' -- Contraction succeeded.') self.new_ffs[-1] = con_ff elif self.do_massive_contraction: logger.log( 20, '~~ DOING MASSIVE CONTRACTION ~~'.rjust(79, '~')) for ff_num, ff in enumerate(self.new_ffs[1:]): for i in range(0, len(last_best_ff.params)): ff.params[i].value = ( (ff.params[i].value + self.new_ffs[0].params[i].value) / 2) self.ff.export_ff(params=ff.params) data = calculate.main(self.args_ff) #deprecated #ff.score = compare.compare_data(r_data, data) r_dict = compare.data_by_type(r_data) c_dict = compare.data_by_type(data) r_dict, c_dict = compare.trim_data(r_dict, c_dict) ff.score = compare.compare_data(r_dict, c_dict) ff.method += ' MC' opt.pretty_ff_results(ff) else: logger.log( 20, ' -- Contraction failed. Keeping parmaeters ' 'anyway.') self.new_ffs[-1] = con_ff self.new_ffs = sorted(self.new_ffs, key=lambda x: x.score) # Keep track of the number of cycles without change. If there's # improvement, reset the counter. if self.new_ffs[0].score < last_best_ff.score: cycles_wo_change = 0 else: cycles_wo_change += 1 logger.log( 20, ' -- {} cycles without improvement out of {} ' 'allowed.'.format(cycles_wo_change, self._max_cycles_wo_change)) logger.log(20, 'BEST:') opt.pretty_ff_results(self.new_ffs[0], level=20) logger.log( 20, '~~ END SIMPLEX CYCLE {} ~~'.format(current_cycle).rjust( 79, '~')) # This sort is likely unnecessary because it should be done at the end # of the last loop cycle, but I put it here just in case. self.new_ffs = sorted(self.new_ffs, key=lambda x: x.score) best_ff = self.new_ffs[0] if best_ff.score < self.ff.score: logger.log( 20, '~~ SIMPLEX FINISHED WITH IMPROVEMENTS ~~'.rjust(79, '~')) best_ff = restore_simp_ff(best_ff, self.ff) else: logger.log( 20, '~~ SIMPLEX FINISHED WITHOUT IMPROVEMENTS ~~'.rjust(79, '~')) # This restores the inital parameters, so no need to use # restore_simp_ff here. best_ff = self.ff opt.pretty_ff_results(self.ff, level=20) opt.pretty_ff_results(best_ff, level=20) logger.log(20, ' -- Writing best force field from simplex.') best_ff.export_ff(best_ff.path) return best_ff
def run(self, r_data=None): """ Once all attributes are setup as you so desire, run this method to optimize the parameters. Returns ------- `datatypes.FF` (or subclass) Contains the best parameters. """ if r_data is None: r_data = opt.return_ref_data(self.args_ref) logger.log(20, '~~ SIMPLEX OPTIMIZATION ~~'.rjust(79, '~')) # Here we don't actually need the database connection/force field data. # We only need the score. if self.ff.score is None: logger.log(20, '~~ CALCULATING INITIAL FF SCORE ~~'.rjust(79, '~')) self.ff.export_ff() # I could store this object to prevent on self.ff to prevent garbage # collection. Would be nice if simplex was followed by gradient, # which needs that information, and if simplex yielded no # improvements. At most points in the optimization, this is probably # too infrequent for it to be worth the memory, but it might be nice # once the parameters are close to convergence. data = calculate.main(self.args_ff) self.ff.score = compare.compare_data(r_data, data) logger.log(20, 'INITIAL FF SCORE: {}'.format(self.ff.score)) else: logger.log(15, ' -- Reused existing score and data for initial FF.') logger.log(15, 'INIT FF SCORE: {}'.format(self.ff.score)) if self.max_params and len(self.ff.params) > self.max_params: if self.ff.params[0].d1: logger.log(15, ' -- Reusing existing parameter derivatives.') # Don't score so this really doesn't take much time. ffs = opt.differentiate_ff(self.ff, central=False) else: logger.log(15, ' -- Calculating new parameter derivatives.') ffs = opt.differentiate_ff(self.ff, central=True) # We have to score to get the derivatives. for ff in ffs: ff.export_ff(lines=self.ff_lines) logger.log(20, ' -- Calculating {}.'.format(ff)) data = calculate.main(self.args_ff) ff.score = compare.compare_data(r_data, data) opt.pretty_ff_results(ff) opt.param_derivs(self.ff, ffs) # Only keep the forward differentiated FFs. ffs = opt.extract_forward(ffs) params = select_simp_params_on_derivs( self.ff.params, max_params=self.max_params) self.new_ffs = opt.extract_ff_by_params(ffs, params) # Reduce number of parameters. # Will need an option that's not MM3* specific. ff_rows = [x.mm3_row for x in params] ff_cols = [x.mm3_col for x in params] for ff in self.new_ffs: new_params = [] for param in ff.params: if param.mm3_row in ff_rows and param.mm3_col in ff_cols: new_params.append(param) ff.params = new_params else: self.new_ffs = opt.differentiate_ff(self.ff, central=False) # Double check and make sure they're all scored. for ff in self.new_ffs: if ff.score is None: ff.export_ff(lines=self.ff_lines) logger.log(20, ' -- Calculating {}.'.format(ff)) data = calculate.main(self.args_ff) ff.score = compare.compare_data(r_data, data) opt.pretty_ff_results(ff) ff_copy = copy.deepcopy(self.ff) new_params = [] for param in ff.params: if param.mm3_row in ff_rows and param.mm3_col in ff_cols: new_params.append(param) ff_copy.params = new_params self.new_ffs = sorted(self.new_ffs + [ff_copy], key=lambda x: x.score) wrapper = textwrap.TextWrapper(width=79) logger.log(20, 'ORDERED FF SCORES:') logger.log(20, wrapper.fill('{}'.format( ' '.join('{:15.4f}'.format(x.score) for x in self.new_ffs)))) # Shows all FFs parameters. opt.pretty_ff_params(self.new_ffs) # Start the simplex cycles. current_cycle = 0 cycles_wo_change = 0 while current_cycle < self.max_cycles \ and cycles_wo_change < self.max_cycles_wo_change: current_cycle += 1 last_best = self.new_ffs[0].score best_ff = self.new_ffs[0] logger.log(20, '~~ START SIMPLEX CYCLE {} ~~'.format( current_cycle).rjust(79, '~')) inv_ff = self.ff.__class__() if self.do_weighted_reflection: inv_ff.method = 'WEIGHTED INVERSION' else: inv_ff.method = 'INVERSION' inv_ff.params = copy.deepcopy(best_ff.params) ref_ff = self.ff.__class__() ref_ff.method = 'REFLECTION' ref_ff.params = copy.deepcopy(best_ff.params) for i in xrange(0, len(best_ff.params)): if self.do_weighted_reflection: try: inv_val = ( sum([x.params[i].value * (x.score - self.new_ffs[-1].score) for x in self.new_ffs[:-1]]) / sum([x.score - self.new_ffs[-1].score for x in self.new_ffs[:-1]])) except ZeroDivisionError: logger.warning( 'Attempted to divide by zero while calculating the ' 'weighted simplex inversion point. All penalty ' 'function scores for the trial force fields are ' 'numerically equivalent.') # Breaking should just exit the while loop. Should still # give you the best force field determined thus far. break else: inv_val = ( sum([x.params[i].value for x in self.new_ffs[:-1]]) / len(self.new_ffs[:-1])) inv_ff.params[i].value = inv_val ref_ff.params[i].value = ( 2 * inv_val - self.new_ffs[-1].params[i].value) # Calculate score for inverted parameters. self.ff.export_ff(self.ff.path, params=inv_ff.params) data = calculate.main(self.args_ff) inv_ff.score = compare.compare_data(r_data, data) opt.pretty_ff_results(inv_ff) # Calculate score for reflected parameters. self.ff.export_ff(self.ff.path, params=ref_ff.params) data = calculate.main(self.args_ff) ref_ff.score = compare.compare_data(r_data, data) opt.pretty_ff_results(ref_ff) if ref_ff.score < self.new_ffs[0].score: logger.log(20, '~~ ATTEMPTING EXPANSION ~~'.rjust(79, '~')) exp_ff = self.ff.__class__() exp_ff.method = 'EXPANSION' exp_ff.params = copy.deepcopy(best_ff.params) for i in xrange(0, len(self.new_ffs[0].params)): exp_ff.params[i].value = ( 3 * inv_ff.params[i].value - 2 * self.new_ffs[-1].params[i].value) self.ff.export_ff(self.ff.path, exp_ff.params) data = calculate.main(self.args_ff) exp_ff.score = compare.compare_data(r_data, data) opt.pretty_ff_results(exp_ff) if exp_ff.score < ref_ff.score: self.new_ffs[-1] = exp_ff logger.log( 20, ' -- Expansion succeeded. Keeping expanded ' 'parameters.') else: self.new_ffs[-1] = ref_ff logger.log( 20, ' -- Expansion failed. Keeping reflected parameters.') elif ref_ff.score < self.new_ffs[-2].score: logger.log(20, ' -- Keeping reflected parameters.') self.new_ffs[-1] = ref_ff else: logger.log(20, '~~ ATTEMPTING CONTRACTION ~~'.rjust(79, '~')) con_ff = self.ff.__class__() con_ff.method = 'CONTRACTION' con_ff.params = copy.deepcopy(best_ff.params) for i in xrange(0, len(best_ff.params)): if ref_ff.score > self.new_ffs[-1].score: con_val = ( (inv_ff.params[i].value + self.new_ffs[-1].params[i].value) / 2) else: con_val = ( (3 * inv_ff.params[i].value - self.new_ffs[-1].params[i].value) / 2) con_ff.params[i].value = con_val self.ff.export_ff(self.ff.path, params=con_ff.params) data = calculate.main(self.args_ff) con_ff.score = compare.compare_data(r_data, data) opt.pretty_ff_results(con_ff) if con_ff.score < self.new_ffs[-2].score: self.new_ffs[-1] = con_ff elif self.do_massive_contraction: logger.log( 20, '~~ DOING MASSIVE CONTRACTION ~~'.rjust(79, '~')) for ff_num, ff in enumerate(self.new_ffs[1:]): for i in xrange(0, len(best_ff.params)): ff.params[i].value = ( (ff.params[i].value + self.new_ffs[0].params[i].value) / 2) self.ff.export_ff(self.ff.path, params=ff.params) data = calculate.main(self.args_ff) ff.score = compare.compare_data(r_data, data) ff.method += ' MC' opt.pretty_ff_results(ff) else: logger.log(20, ' -- Contraction failed.') self.new_ffs = sorted(self.new_ffs, key=lambda x: x.score) if self.new_ffs[0].score < last_best: cycles_wo_change = 0 else: cycles_wo_change += 1 logger.log(20, ' -- {} cycles without change.'.format( cycles_wo_change)) best_ff = self.new_ffs[0] logger.log(20, 'BEST:') opt.pretty_ff_results(self.new_ffs[0], level=20) logger.log(20, '~~ END SIMPLEX CYCLE {} ~~'.format( current_cycle).rjust(79, '~')) if best_ff.score < self.ff.score: logger.log(20, '~~ SIMPLEX FINISHED WITH IMPROVEMENTS ~~'.rjust( 79, '~')) best_ff = restore_simp_ff(best_ff, self.ff) else: logger.log(20, '~~ SIMPLEX FINISHED WITHOUT IMPROVEMENTS ~~'.rjust( 79, '~')) opt.pretty_ff_results(self.ff, level=20) opt.pretty_ff_results(best_ff, level=20) logger.log(20, ' -- Writing best force field from simplex.') best_ff.export_ff(best_ff.path) return best_ff