def main(args): parser = return_compare_parser() opts = parser.parse_args(args) r_data = calculate.main(opts.reference.split()) c_data = calculate.main(opts.calculate.split()) score = compare_data(r_data, c_data) # Pretty readouts. Maybe opts.output could have 3 values: # True, False or None # Then I wouldn't need 2 if statements here. if opts.output or opts.print: pretty_data_comp(r_data, c_data, output=opts.output, doprint=opts.print) logger.log(1, '>>> score: {}'.format(score))
def main(args): logger.log(1, ">>> main <<<") parser = return_compare_parser() opts = parser.parse_args(args) r_data = calculate.main(opts.reference.split()) c_data = calculate.main(opts.calculate.split()) score = compare_data(r_data, c_data) # Pretty readouts. if opts.output: pretty_data_comp(r_data, c_data, output=opts.output) if opts.print: pretty_data_comp(r_data, c_data) logger.log(1, ">>> score: {}".format(score))
def main(args): parser = return_compare_parser() opts = parser.parse_args(args) r_data = calculate.main(opts.reference.split()) c_data = calculate.main(opts.calculate.split()) score = compare_data(r_data, c_data) # Pretty readouts. Maybe opts.output could have 3 values: # True, False or None # Then I wouldn't need 2 if statements here. if opts.output: pretty_data_comp(r_data, c_data, output=opts.output) if opts.print: pretty_data_comp(r_data, c_data) logger.log(1, '>>> score: {}'.format(score))
def read_and_calculate(): with open(args_file, 'r') as file_object: next_i = get_next_line() if next_i < max_lines: bl_first = 1 for i, line in enumerate(file_object): if i == next_i: if bl_first: delay = time.time() - time_start print( f'First calculation started {round(delay, 1)}s after launch' ) bl_first = 0 # Get current arguments and simulate cur_args = line.strip() print( f"\nLaunching calculation line #{next_i} with parameters '" + cur_args + "'") try: main(cur_args) except Exception as e: print( f"Encountered unhandled exception while calculating line #{next_i} " f"with parameters '" + cur_args + "'.") print('Exception:\n', e) traceback.print_exc() # print('Skipping.') # raise(e) check_stop() next_i = get_next_line() # If the next_i is smaller than the current, it is likely the calculation has # been reset. Need to restart from the start if next_i < i: print( 'Detected arguments file change. Reloading the arguments file' ) read_and_calculate() warnings.warn('Unexpected parent exit.') sys.exit(EXIT_PARENT_UNEXPECTED) else: print( f'Position file points to the end of the arguments file ({next_i} >= ' f'{max_lines}). No calculations have been performed.') print("Queue empty. Finishing.") sys.exit(EXIT_SUCCESS)
def cal_ff(ff, ff_args, parent_ff=None, store_data=False): if ff.path: path = ff.path else: path = ff.path = parent_ff.path if ff.lines: # If I understand Python, then this creates a pointer, not a # copy of the object. Storing the lines from mm3.fld is sort of # big. lines = parent_ff.lines ff.export_ff(path, lines=lines) data = calculate.main(ff_args) if store_data: ff.data = data return data
def setUp(self): self.conn = calculate.main( ' -d d_rhod -meig X001_E1.01.mae,X001_E1.out'.split())
def return_ref_data(args_ref): logger.log(20, '~~ GATHERING REFERENCE DATA ~~'.rjust(79, '~')) ref_data = calculate.main(args_ref) compare.import_weights(ref_data) return ref_data
def run_loop_input(self, lines, score=None): lines_iterator = iter(lines) while True: try: line = lines_iterator.next() except StopIteration: return self.ff cols = line.split() if cols[0] == 'DIR': self.direc = cols[1] if cols[0] == 'FFLD': # Import FF data. if cols[1] == 'read': self.ff = datatypes.MM3(os.path.join(self.direc, cols[2])) self.ff.import_ff() self.ff.method = 'READ' with open(os.path.join(self.direc, cols[2]), 'r') as f: self.ff.lines = f.readlines() # Export FF data. if cols[1] == 'write': self.ff.export_ff(os.path.join(self.direc, cols[2])) # Trim parameters. if cols[0] == 'PARM': logger.log(20, '~~ SELECTING PARAMETERS ~~'.rjust(79, '~')) self.ff.params = parameters.trim_params_by_file( self.ff.params, os.path.join(self.direc, cols[1])) if cols[0] == 'LOOP': # Read lines that will be looped over. inner_loop_lines = [] line = lines_iterator.next() while line.split()[0] != 'END': inner_loop_lines.append(line) line = lines_iterator.next() # Make loop object and populate attributes. loop = Loop() loop.convergence = float(cols[1]) loop.direc = self.direc loop.ff = self.ff loop.args_ff = self.args_ff loop.args_ref = self.args_ref loop.ref_data = self.ref_data loop.loop_lines = inner_loop_lines # Log commands. pretty_loop_input( inner_loop_lines, name='OPTIMIZATION LOOP', score=self.ff.score) # Run inner loop. self.ff = loop.opt_loop() # Note: Probably want to update this to append the directory given # by the new DIR command. if cols[0] == 'RDAT': logger.log( 20, '~~ CALCULATING REFERENCE DATA ~~'.rjust(79, '~')) if len(cols) > 1: self.args_ref = ' '.join(cols[1:]).split() self.ref_data = opt.return_ref_data(self.args_ref) if cols[0] == 'CDAT': logger.log( 20, '~~ CALCULATING FF DATA ~~'.rjust(79, '~')) if len(cols) > 1: self.args_ff = ' '.join(cols[1:]).split() self.ff.data = calculate.main(self.args_ff) if cols[0] == 'COMP': self.ff.score = compare.compare_data( self.ref_data, self.ff.data) if '-o' in cols: compare.pretty_data_comp( self.ref_data, self.ff.data, os.path.join(self.direc, cols[cols.index('-o') + 1])) if '-p' in cols: compare.pretty_data_comp( self.ref_data, self.ff.data) if cols[0] == 'GRAD': grad = gradient.Gradient( direc=self.direc, ff=self.ff, ff_lines=self.ff.lines, args_ff=self.args_ff) self.ff = grad.run(ref_data=self.ref_data) if cols[0] == 'SIMP': simp = simplex.Simplex( direc=self.direc, ff=self.ff, ff_lines=self.ff.lines, args_ff=self.args_ff) self.ff = simp.run(r_data=self.ref_data) if cols[0] == 'WGHT': data_type = cols[1] co.WEIGHTS[data_type] = float(cols[2])
def run_loop_input(self, lines, score=None): lines_iterator = iter(lines) while True: try: line = next(lines_iterator) except StopIteration: return self.ff cols = line.split() if cols[0] == 'DIR': self.direc = cols[1] if cols[0] == 'FFLD': # Import FF data. if cols[1] == 'read': if cols[2] == 'mm3.fld': self.ff = datatypes.MM3(os.path.join(self.direc, cols[2])) if '.prm' in cols[2]: self.ff = datatypes.TinkerFF(os.path.join(self.direc, cols[2])) self.ff.import_ff() self.ff.method = 'READ' with open(os.path.join(self.direc, cols[2]), 'r') as f: self.ff.lines = f.readlines() # Export FF data. if cols[1] == 'write': self.ff.export_ff(os.path.join(self.direc, cols[2])) # Trim parameters. if cols[0] == 'PARM': logger.log(20, '~~ SELECTING PARAMETERS ~~'.rjust(79, '~')) self.ff.params = parameters.trim_params_by_file( self.ff.params, os.path.join(self.direc, cols[1])) if cols[0] == 'LOOP': # Read lines that will be looped over. inner_loop_lines = [] line = next(lines_iterator) while line.split()[0] != 'END': inner_loop_lines.append(line) line = next(lines_iterator) # Make loop object and populate attributes. loop = Loop() loop.convergence = float(cols[1]) loop.direc = self.direc loop.ff = self.ff loop.args_ff = self.args_ff loop.args_ref = self.args_ref loop.ref_data = self.ref_data loop.loop_lines = inner_loop_lines # Log commands. pretty_loop_input( inner_loop_lines, name='OPTIMIZATION LOOP', score=self.ff.score) # Run inner loop. self.ff = loop.opt_loop() # Note: Probably want to update this to append the directory given # by the new DIR command. if cols[0] == 'RDAT': logger.log( 20, '~~ CALCULATING REFERENCE DATA ~~'.rjust(79, '~')) if len(cols) > 1: self.args_ref = ' '.join(cols[1:]).split() self.ref_data = opt.return_ref_data(self.args_ref) if cols[0] == 'CDAT': logger.log( 20, '~~ CALCULATING FF DATA ~~'.rjust(79, '~')) if len(cols) > 1: self.args_ff = ' '.join(cols[1:]).split() self.ff.data = calculate.main(self.args_ff) if cols[0] == 'COMP': # Deprecated # self.ff.score = compare.compare_data( # self.ref_data, self.ff.data) # if '-o' in cols: # compare.pretty_data_comp( # self.ref_data, # self.ff.data, # os.path.join(self.direc, cols[cols.index('-o') + 1])) # if '-p' in cols: # compare.pretty_data_comp( # self.ref_data, # self.ff.data, # doprint=True) output = False doprint = False r_dict = compare.data_by_type(self.ref_data) c_dict = compare.data_by_type(self.ff.data) r_dict, c_dict = compare.trim_data(r_dict,c_dict) if '-o' in cols: output = os.path.join(self.direc, cols[cols.index('-o') +1]) if '-p' in cols: doprint = True self.ff.score = compare.compare_data( r_dict, c_dict, output=output, doprint=doprint) if cols[0] == 'GRAD': grad = gradient.Gradient( direc=self.direc, ff=self.ff, ff_lines=self.ff.lines, args_ff=self.args_ff) #### Should probably just write a function instead of looping #### this for every gradient method. This includes everything #### between the two lines of #. TR 20180112 ############################################################## for col in cols[1:]: if "lstsq" in col: g_args = col.split('=')[1].split(',') for arg in g_args: if arg == "True": grad.do_lstsq=True elif arg == False: grad.do_lstsq=False if 'radii' in arg: grad.lstsq_radii = [] radii_vals = re.search( r"\[(.+)\]",arg).group(1).split('/') if radii_vals == "None": grad.lstsq_radii = None else: for val in radii_vals: grad.lstsq_radii.append(float(val)) if 'cutoff' in arg: grad.lstsq_cutoff = [] cutoff_vals = re.search( r"\[(.+)\]",arg).group(1).split('/') if cutoff_vals == "None": grad.lstsq_cutoff = None else: if len(cutoff_vals) > 2 or \ len(cutoff_vals) < 2: raise Exception("Cutoff values must " \ "be between two numbers.") for val in cutoff_vals: grad.lstsq_cutoff.append(float(val)) elif "newton" in col: g_args = col.split('=')[1].split(',') for arg in g_args: if arg == "True": grad.do_newton=True elif arg == False: grad.do_newton=False if 'radii' in arg: grad.newton_radii = [] radii_vals = re.search( r"\[(.+)\]",arg).group(1).split('/') if radii_vals=='None': grad.newton_radii = None else: for val in radii_vals: grad.newton_radii.append(float(val)) if 'cutoff' in arg: grad.newton_cutoff = [] cutoff_vals = re.search( r"\[(.+)\]",arg).group(1).split('/') if cutoff_vals=='None': grad.newton_cutoff = None else: if len(cutoff_vals) > 2 or \ len(cutoff_vals) < 2: raise Exception("Cutoff values must " \ "be between two numbers.") for val in cutoff_vals: grad.newton_cutoff.append(float(val)) elif "levenberg" in col: g_args = col.split('=')[1].split(',') for arg in g_args: if arg == "True": grad.do_levenberg=True elif arg == False: grad.do_levenberg=False if 'radii' in arg: grad.levenberg_radii = [] radii_vals = re.search( r"\[(.+)\]",arg).group(1).split('/') if radii_vals=='None': grad.levenberg_radii = None else: for val in radii_vals: grad.levenberg_radii.append(float(val)) if 'cutoff' in arg: grad.levenberg_cutoff = [] cutoff_vals = re.search( r"\[(.+)\]",arg).group(1).split('/') if cutoff_vals=='None': grad.levenberg_cutoff = None else: if len(cutoff_vals) > 2 or \ len(cutoff_vals) < 2: raise Exception("Cutoff values must " \ "be between two numbers.") for val in cutoff_vals: grad.levenberg_cutoff.append(float(val)) if 'factor' in arg: grad.levenberg_cutoff = [] factor_vals = re.search( r"\[(.+)\]",arg).group(1).split('/') if factor_vals=='None': grad.levenberg_factor = None else: for val in factor_vals: grad.levenberg_factor.append(float(val)) elif "lagrange" in col: g_args = col.split('=')[1].split(',') for arg in g_args: if arg == "True": grad.do_lagrange=True elif arg == False: grad.do_lagrange=False if 'radii' in arg: grad.lagrange_radii = [] radii_vals = re.search( r"\[(.+)\]",arg).group(1).split('/') if radii_vals=='None': grad.lagrange_radii = None else: for val in radii_vals: grad.lagrange_radii.append(float(val)) if 'cutoff' in arg: grad.lagrange_cutoff = [] cutoff_vals = re.search( r"\[(.+)\]",arg).group(1).split('/') if cutoff_vals=='None': grad.lagrange_cutoff = None else: if len(cutoff_vals) > 2 or \ len(cutoff_vals) < 2: raise Exception("Cutoff values must " \ "be between two numbers.") for val in cutoff_vals: grad.lagrange_cutoff.append(float(val)) if 'factor' in arg: grad.lagrange_factors = [] factor_vals = re.search( r"\[(.+)\]",arg).group(1).split('/') if factor_vals=='None': grad.lagrange_factors = None else: for val in factor_vals: grad.lagrange_factors.append(float(val)) elif "svd" in col: g_args = col.split('=')[1].split(',') for arg in g_args: if arg == "True": grad.do_svd=True elif arg == False: grad.do_svd=False if 'radii' in arg: grad.svd_radii = [] radii_vals = re.search( r"\[(.+)\]",arg).group(1).split('/') if radii_vals=='None': grad.svd_radii = None else: for val in radii_vals: grad.svd_radii.append(float(val)) if 'cutoff' in arg: grad.svd_cutoff = [] cutoff_vals = re.search( r"\[(.+)\]",arg).group(1).split('/') if cutoff_vals=='None': grad.svd_cutoff = None else: if len(cutoff_vals) > 2 or \ len(cutoff_vals) < 2: raise Exception("Cutoff values must " \ "be between two numbers.") for val in cutoff_vals: grad.svd_cutoff.append(float(val)) if 'factor' in arg: grad.svd_cutoff = [] factor_vals = re.search( r"\[(.+)\]",arg).group(1).split('/') if factor_vals=='None': grad.svd_factor = None else: for val in factor_vals: grad.svd_factor.append(float(val)) else: raise Exception("'{}' : Not Recognized".format(col)) ############################################################## self.ff = grad.run(ref_data=self.ref_data) if cols[0] == 'SIMP': simp = simplex.Simplex( direc=self.direc, ff=self.ff, ff_lines=self.ff.lines, args_ff=self.args_ff) for col in cols[1:]: if "max_params" in col: simp.max_params = col.split('=')[1] else: raise Exception("'{}' : Not Recognized".format(col)) self.ff = simp.run(r_data=self.ref_data) if cols[0] == 'WGHT': data_type = cols[1] co.WEIGHTS[data_type] = float(cols[2]) if cols[0] == 'STEP': param_type = cols[1] co.STEPS[param_type] = float(cols[2])
def func_EQL(): data = T1.get(1.0, tk.END) res = calculate.main(data) T1.delete(1.0, tk.END) T1.insert(1.0, res)
def run(self, ref_data=None, restart=None): # We need reference data if you didn't provide it. if ref_data is None: ref_data = opt.return_ref_data(self.args_ref) # We need the initial FF data. if self.ff.data is None: logger.log(20, '~~ GATHERING INITIAL FF DATA ~~'.rjust(79, '~')) # Check whether this is efficient with the ff_lines. self.ff.export_ff() self.ff.data = calculate.main(self.args_ff) # We could do this, but the zeroing of energies has already been # done. # self.ff.score = compare.compare_data(ref_data, self.ff.data) # So instead we do this. compare.correlate_energies(ref_data, self.ff.data) if self.ff.score is None: # Already zeroed reference and correlated the energies. self.ff.score = compare.calculate_score(ref_data, self.ff.data) logger.log(20, 'INITIAL FF SCORE: {}'.format(self.ff.score)) logger.log(20, '~~ GRADIENT OPTIMIZATION ~~'.rjust(79, '~')) # We need a file to hold the differentiated parameter data. logger.log(20, '~~ CENTRAL DIFFERENTIATION ~~'.rjust(79, '~')) if restart: par_file = restart logger.log( 20, ' -- Restarting gradient from central ' 'differentiation file {}.'.format(par_file)) else: par_files = glob.glob(os.path.join(self.direc, 'par_diff_???.txt')) if par_files: par_files.sort() most_recent_par_file = par_files[-1] most_recent_par_file = most_recent_par_file.split('/')[-1] most_recent_num = most_recent_par_file[9:12] num = int(most_recent_num) + 1 par_file = 'par_diff_{:03d}.txt'.format(num) else: par_file = 'par_diff_001.txt' logger.log( 20, ' -- Generating central differentiation ' 'file {}.'.format(par_file)) f = open(os.path.join(self.direc, par_file), 'w') csv_writer = csv.writer(f) # Row 1 - Labels # Row 2 - Weights # Row 3 - Reference data values # Row 4 - Initial FF data values csv_writer.writerow([x.lbl for x in ref_data]) csv_writer.writerow([x.wht for x in ref_data]) csv_writer.writerow([x.val for x in ref_data]) csv_writer.writerow([x.val for x in self.ff.data]) logger.log(20, '~~ DIFFERENTIATING PARAMETERS ~~'.rjust(79, '~')) # Save many FFs, each with their own parameter sets. ffs = opt.differentiate_ff(self.ff) logger.log( 20, '~~ SCORING DIFFERENTIATED PARAMETERS ~~'.rjust(79, '~')) for ff in ffs: ff.export_ff(lines=self.ff.lines) logger.log(20, ' -- Calculating {}.'.format(ff)) data = calculate.main(self.args_ff) compare.correlate_energies(ref_data, data) ff.score = compare.calculate_score(ref_data, data) opt.pretty_ff_results(ff) # Write the data rather than storing it in memory. For large parameter # sets, this could consume GBs of memory otherwise! csv_writer.writerow([x.val for x in data]) f.close() # Calculate the Jacobian, residual vector, matrix A and vector b. # These aren't needed if you're only doing Newton-Raphson. if self.do_lstsq or self.do_lagrange or self.do_levenberg or \ self.do_svd: logger.log(20, '~~ JACOBIAN AND RESIDUAL VECTOR ~~'.rjust(79, '~')) # Setup the residual vector. num_d = len(ref_data) resid = np.empty((num_d, 1), dtype=float) for i in xrange(0, num_d): resid[i, 0] = ref_data[i].wht * (ref_data[i].val - self.ff.data[i].val) # logger.log(5, 'RESIDUAL VECTOR:\n{}'.format(resid)) logger.log(20, ' -- Formed {} residual vector.'.format(resid.shape)) # Setup the Jacobian. num_p = len(self.ff.params) # Maybe should be a part of the Jacobian function. jacob = np.empty((num_d, num_p), dtype=float) jacob = return_jacobian(jacob, os.path.join(self.direc, par_file)) # logger.log(5, 'JACOBIAN:\n{}'.format(jacob)) logger.log(20, ' -- Formed {} Jacobian.'.format(jacob.shape)) ma = jacob.T.dot(jacob) vb = jacob.T.dot(resid) # We need these for most optimization methods. logger.log(5, ' MATRIX A AND VECTOR B '.center(79, '-')) # logger.log(5, 'A:\n{}'.format(ma)) # logger.log(5, 'b:\n{}'.format(vb)) # Start coming up with new parameter sets. if self.do_newton and not restart: logger.log(20, '~~ NEWTON-RAPHSON ~~'.rjust(79, '~')) # Make sure we have derivative information. if self.ff.params[0].d1 is None: opt.param_derivs(self.ff, ffs) changes = do_newton(self.ff.params, radii=self.newton_radii, cutoffs=self.newton_cutoffs) cleanup(self.new_ffs, self.ff, changes) if self.do_lstsq: logger.log(20, '~~ LEAST SQUARES ~~'.rjust(79, '~')) changes = do_lstsq(ma, vb, radii=self.lstsq_radii, cutoffs=self.lstsq_cutoffs) cleanup(self.new_ffs, self.ff, changes) if self.do_lagrange: logger.log(20, '~~ LAGRANGE ~~'.rjust(79, '~')) for factor in sorted(self.lagrange_factors): changes = do_lagrange(ma, vb, factor, radii=self.lagrange_radii, cutoffs=self.lagrange_cutoffs) cleanup(self.new_ffs, self.ff, changes) if self.do_levenberg: logger.log(20, '~~ LEVENBERG ~~'.rjust(79, '~')) for factor in sorted(self.levenberg_factors): changes = do_levenberg(ma, vb, factor, radii=self.levenberg_radii, cutoffs=self.levenberg_cutoffs) cleanup(self.new_ffs, self.ff, changes) if self.do_svd: logger.log(20, '~~ SINGULAR VALUE DECOMPOSITION ~~'.rjust(79, '~')) # J = U . s . VT mu, vs, mvt = return_svd(jacob) logger.log(1, '>>> mu.shape: {}'.format(mu.shape)) logger.log(1, '>>> vs.shape: {}'.format(vs.shape)) logger.log(1, '>>> mvt.shape: {}'.format(mvt.shape)) logger.log(1, '>>> vb.shape: {}'.format(vb.shape)) if self.svd_factors: changes = do_svd_w_thresholds(mu, vs, mvt, resid, self.svd_factors, radii=self.svd_radii, cutoffs=self.svd_cutoffs) else: changes = do_svd_wo_thresholds(mu, vs, mvt, resid, radii=self.svd_radii, cutoffs=self.svd_cutoffs) cleanup(self.new_ffs, self.ff, changes) # Report how many trial FFs were generated. logger.log( 20, ' -- Generated {} trial force field(s).'.format( len(self.new_ffs))) # If there are any trials, test them. if self.new_ffs: logger.log(20, '~~ EVALUATING TRIAL FF(S) ~~'.rjust(79, '~')) for ff in self.new_ffs: data = opt.cal_ff(ff, self.args_ff, parent_ff=self.ff) # Shouldn't need to zero anymore. ff.score = compare.compare_data(ref_data, data) opt.pretty_ff_results(ff) self.new_ffs = sorted(self.new_ffs, key=lambda x: x.score) # Check for improvement. if self.new_ffs[0].score < self.ff.score: ff = self.new_ffs[0] logger.log( 20, '~~ GRADIENT FINISHED WITH IMPROVEMENTS ~~'.rjust(79, '~')) opt.pretty_ff_results(self.ff, level=20) opt.pretty_ff_results(ff, level=20) # Copy parameter derivatives from original FF to save time in # case we move onto simplex immediately after this. copy_derivs(self.ff, ff) else: ff = self.ff else: ff = self.ff return ff
def run_loop_input(self, lines, score=None): lines_iterator = iter(lines) while True: try: line = lines_iterator.next() except StopIteration: return self.ff cols = line.split() if cols[0] == 'DIR': self.direc = cols[1] if cols[0] == 'FFLD': # Import FF data. if cols[1] == 'read': self.ff = datatypes.MM3(os.path.join(self.direc, cols[2])) self.ff.import_ff() self.ff.method = 'READ' with open(os.path.join(self.direc, cols[2]), 'r') as f: self.ff.lines = f.readlines() # Export FF data. if cols[1] == 'write': self.ff.export_ff(os.path.join(self.direc, cols[2])) # Trim parameters. if cols[0] == 'PARM': logger.log(20, '~~ SELECTING PARAMETERS ~~'.rjust(79, '~')) self.ff.params = parameters.trim_params_by_file( self.ff.params, os.path.join(self.direc, cols[1])) if cols[0] == 'LOOP': # Read lines that will be looped over. inner_loop_lines = [] line = lines_iterator.next() while line.split()[0] != 'END': inner_loop_lines.append(line) line = lines_iterator.next() # Make loop object and populate attributes. loop = Loop() loop.convergence = float(cols[1]) loop.direc = self.direc loop.ff = self.ff loop.args_ff = self.args_ff loop.args_ref = self.args_ref loop.ref_data = self.ref_data loop.loop_lines = inner_loop_lines # Log commands. pretty_loop_input(inner_loop_lines, name='OPTIMIZATION LOOP', score=self.ff.score) # Run inner loop. self.ff = loop.opt_loop() # Note: Probably want to update this to append the directory given # by the new DIR command. if cols[0] == 'RDAT': logger.log(20, '~~ CALCULATING REFERENCE DATA ~~'.rjust(79, '~')) if len(cols) > 1: self.args_ref = ' '.join(cols[1:]).split() self.ref_data = opt.return_ref_data(self.args_ref) if cols[0] == 'CDAT': logger.log(20, '~~ CALCULATING FF DATA ~~'.rjust(79, '~')) if len(cols) > 1: self.args_ff = ' '.join(cols[1:]).split() self.ff.data = calculate.main(self.args_ff) if cols[0] == 'COMP': self.ff.score = compare.compare_data(self.ref_data, self.ff.data) if '-o' in cols: compare.pretty_data_comp( self.ref_data, self.ff.data, os.path.join(self.direc, cols[cols.index('-o') + 1])) if '-p' in cols: compare.pretty_data_comp(self.ref_data, self.ff.data) if cols[0] == 'GRAD': grad = gradient.Gradient(direc=self.direc, ff=self.ff, ff_lines=self.ff.lines, args_ff=self.args_ff) self.ff = grad.run(ref_data=self.ref_data) if cols[0] == 'SIMP': simp = simplex.Simplex(direc=self.direc, ff=self.ff, ff_lines=self.ff.lines, args_ff=self.args_ff) self.ff = simp.run(r_data=self.ref_data) if cols[0] == 'WGHT': data_type = cols[1] co.WEIGHTS[data_type] = float(cols[2])
def run(self, r_data=None): """ Once all attributes are setup as you so desire, run this method to optimize the parameters. Returns ------- `datatypes.FF` (or subclass) Contains the best parameters. """ if r_data is None: r_data = opt.return_ref_data(self.args_ref) if self.ff.score is None: logger.log(20, '~~ CALCULATING INITIAL FF SCORE ~~'.rjust(79, '~')) self.ff.export_ff() # Could store data on self.ff.data if we wanted. Not necessary for # simplex. If simplex yielded no improvements, it would return this # FF, and then we might want the data such taht we don't have to # recalculate it in gradient. Let's hope simplex generally yields # improvements. data = calculate.main(self.args_ff) #deprecated #self.ff.score = compare.compare_data(r_data, data) r_dict = compare.data_by_type(r_data) c_dict = compare.data_by_type(data) r_dict, c_dict = compare.trim_data(r_dict, c_dict) self.ff.score = compare.compare_data(r_dict, c_dict) else: logger.log(20, ' -- Reused existing score and data for initial FF.') logger.log(20, '~~ SIMPLEX OPTIMIZATION ~~'.rjust(79, '~')) logger.log(20, 'INIT FF SCORE: {}'.format(self.ff.score)) opt.pretty_ff_results(self.ff, level=20) # Here's what we do if there are too many parameters. if self.max_params and len(self.ff.params) > self.max_params: logger.log(20, ' -- More parameters than the maximum allowed.') logger.log(5, 'CURRENT PARAMS: {}'.format(len(self.ff.params))) logger.log(5, 'MAX PARAMS: {}'.format(self.max_params)) # Here we select the parameters that have the lowest 2nd # derivatives. # Could fail when simplex finds improvements but restores other # parameters. # if self.ff.params[0].d1: if None in [x.d1 for x in self.ff.params]: logger.log(15, ' -- Calculating new parameter derivatives.') # Do central differentiation so we can calculate derivatives. # Another option would be to write code to determine # derivatives only from forward differentiation. ffs = opt.differentiate_ff(self.ff, central=True) # We have to score to get the derivatives. for ff in ffs: ff.export_ff(path=self.ff.path, lines=self.ff_lines) logger.log(20, ' -- Calculating {}.'.format(ff)) data = calculate.main(self.args_ff) #deprecated #ff.score = compare.compare_data(r_data, data) r_dict = compare.data_by_type(r_data) c_dict = compare.data_by_type(data) r_dict, c_dict = compare.trim_data(r_dict, c_dict) ff.score = compare.compare_data(r_dict, c_dict) opt.pretty_ff_results(ff) # Add the derivatives to your original FF. opt.param_derivs(self.ff, ffs) # Only keep the forward differentiated FFs. ffs = opt.extract_forward(ffs) logger.log( 5, ' -- Keeping {} forward differentiated ' 'FFs.'.format(len(ffs))) else: logger.log(15, ' -- Reusing existing parameter derivatives.') # Differentiate all parameters forward. Yes, I know this is # counter-intuitive because we are going to only use subset of # the forward differentiated FFs. However, this is very # computationally inexpensive because we're not scoring them # now. We will remove the forward differentiated FFs we don't # want before scoring. ffs = opt.differentiate_ff(self.ff, central=False) # This sorts the parameters based upon their 2nd derivative. # It keeps the ones with lowest 2nd derivatives. # SCHEDULED FOR CHANGES. NOT A GOOD SORTING CRITERION. params = select_simp_params_on_derivs(self.ff.params, max_params=self.max_params) # From the entire list of forward differentiated FFs, pick # out the ones that have the lowest 2nd derivatives. self.new_ffs = opt.extract_ff_by_params(ffs, params) logger.log(1, '>>> len(self.new_ffs): {}'.format(len(self.new_ffs))) # Reduce number of parameters. # Will need an option that's not MM3* specific in the future. ff_rows = [x.mm3_row for x in params] ff_cols = [x.mm3_col for x in params] for ff in self.new_ffs: new_params = [] for param in ff.params: if param.mm3_row in ff_rows and param.mm3_col in ff_cols: new_params.append(param) ff.params = new_params # Make a copy of your original FF that has less parameters. ff_copy = copy.deepcopy(self.ff) new_params = [] for param in ff.params: if param.mm3_row in ff_rows and param.mm3_col in ff_cols: new_params.append(param) ff_copy.params = new_params else: # In this case it's simple. Just forward differentiate each # parameter. self.new_ffs = opt.differentiate_ff(self.ff, central=False) logger.log(1, '>>> len(self.new_ffs): {}'.format(len(self.new_ffs))) # Still make that FF copy. ff_copy = copy.deepcopy(self.ff) # Double check and make sure they're all scored. for ff in self.new_ffs: if ff.score is None: ff.export_ff(path=self.ff.path, lines=self.ff_lines) logger.log(20, ' -- Calculating {}.'.format(ff)) data = calculate.main(self.args_ff) #deprecated #ff.score = compare.compare_data(r_data, data) r_dict = compare.data_by_type(r_data) c_dict = compare.data_by_type(data) r_dict, c_dict = compare.trim_data(r_dict, c_dict) ff.score = compare.compare_data(r_dict, c_dict) opt.pretty_ff_results(ff) # Add your copy of the orignal to FF to the forward differentiated FFs. self.new_ffs = sorted(self.new_ffs + [ff_copy], key=lambda x: x.score) # Allow 3 cycles w/o change for each parameter present. Remember that # the initial FF was added here, hence the minus one. self._max_cycles_wo_change = 3 * (len(self.new_ffs) - 1) wrapper = textwrap.TextWrapper(width=79) # Shows all FFs parameters. opt.pretty_ff_params(self.new_ffs) # Start the simplex cycles. current_cycle = 0 cycles_wo_change = 0 while current_cycle < self.max_cycles \ and cycles_wo_change < self._max_cycles_wo_change: current_cycle += 1 # Save the last best in case some accidental sort goes on. # Plus it makes reading the code a litle easier. last_best_ff = copy.deepcopy(self.new_ffs[0]) logger.log( 20, '~~ START SIMPLEX CYCLE {} ~~'.format(current_cycle).rjust( 79, '~')) logger.log(20, 'ORDERED FF SCORES:') logger.log( 20, wrapper.fill('{}'.format(' '.join('{:15.4f}'.format(x.score) for x in self.new_ffs)))) inv_ff = self.ff.__class__() if self.do_weighted_reflection: inv_ff.method = 'WEIGHTED INVERSION' else: inv_ff.method = 'INVERSION' inv_ff.params = copy.deepcopy(last_best_ff.params) ref_ff = self.ff.__class__() ref_ff.method = 'REFLECTION' ref_ff.params = copy.deepcopy(last_best_ff.params) # Need score difference sum for weighted inversion. # Calculate this value before going into loop. if self.do_weighted_reflection: # If zero, should break. score_diff_sum = sum([ x.score - self.new_ffs[-1].score for x in self.new_ffs[:-1] ]) if score_diff_sum == 0.: logger.warning('No difference between force field scores. ' 'Exiting simplex.') # We want to raise opt.OptError such that # opt.catch_run_errors will write the best FF obtained thus # far. raise opt.OptError( 'No difference between force field scores. ' 'Exiting simplex.') for i in range(0, len(last_best_ff.params)): if self.do_weighted_reflection: inv_val = (sum([ x.params[i].value * (x.score - self.new_ffs[-1].score) for x in self.new_ffs[:-1] ]) / score_diff_sum) else: inv_val = ( sum([x.params[i].value for x in self.new_ffs[:-1]]) / len(self.new_ffs[:-1])) inv_ff.params[i].value = inv_val ref_ff.params[i].value = (2 * inv_val - self.new_ffs[-1].params[i].value) # The inversion point does not need to be scored. # Calculate score for reflected parameters. ref_ff.export_ff(path=self.ff.path, lines=self.ff.lines) data = calculate.main(self.args_ff) #deprecated #ref_ff.score = compare.compare_data(r_data, data) r_dict = compare.data_by_type(r_data) c_dict = compare.data_by_type(data) r_dict, c_dict = compare.trim_data(r_dict, c_dict) ref_ff.score = compare.compare_data(r_dict, c_dict) opt.pretty_ff_results(ref_ff) if ref_ff.score < last_best_ff.score: logger.log(20, '~~ ATTEMPTING EXPANSION ~~'.rjust(79, '~')) exp_ff = self.ff.__class__() exp_ff.method = 'EXPANSION' exp_ff.params = copy.deepcopy(last_best_ff.params) for i in range(0, len(last_best_ff.params)): exp_ff.params[i].value = ( 3 * inv_ff.params[i].value - 2 * self.new_ffs[-1].params[i].value) exp_ff.export_ff(path=self.ff.path, lines=self.ff.lines) data = calculate.main(self.args_ff) #deprecated #exp_ff.score = compare.compare_data(r_data, data) r_dict = compare.data_by_type(r_data) c_dict = compare.data_by_type(data) r_dict, c_dict = compare.trim_data(r_dict, c_dict) exp_ff.score = compare.compare_data(r_dict, c_dict) opt.pretty_ff_results(exp_ff) if exp_ff.score < ref_ff.score: self.new_ffs[-1] = exp_ff logger.log( 20, ' -- Expansion succeeded. Keeping expanded ' 'parameters.') else: self.new_ffs[-1] = ref_ff logger.log( 20, ' -- Expansion failed. Keeping reflected parameters.') elif ref_ff.score < self.new_ffs[-2].score: logger.log(20, ' -- Keeping reflected parameters.') self.new_ffs[-1] = ref_ff else: logger.log(20, '~~ ATTEMPTING CONTRACTION ~~'.rjust(79, '~')) con_ff = self.ff.__class__() con_ff.method = 'CONTRACTION' con_ff.params = copy.deepcopy(last_best_ff.params) for i in range(0, len(last_best_ff.params)): if ref_ff.score > self.new_ffs[-1].score: con_val = ((inv_ff.params[i].value + self.new_ffs[-1].params[i].value) / 2) else: con_val = ((3 * inv_ff.params[i].value - self.new_ffs[-1].params[i].value) / 2) con_ff.params[i].value = con_val self.ff.export_ff(params=con_ff.params) data = calculate.main(self.args_ff) #deprecated #con_ff.score = compare.compare_data(r_data, data) r_dict = compare.data_by_type(r_data) c_dict = compare.data_by_type(data) r_dict, c_dict = compare.trim_data(r_dict, c_dict) con_ff.score = compare.compare_data(r_dict, c_dict) opt.pretty_ff_results(con_ff) # This change was made to reflect the 1998 Q2MM publication. # if con_ff.score < self.new_ffs[-1].score: if con_ff.score < self.new_ffs[-2].score: logger.log(20, ' -- Contraction succeeded.') self.new_ffs[-1] = con_ff elif self.do_massive_contraction: logger.log( 20, '~~ DOING MASSIVE CONTRACTION ~~'.rjust(79, '~')) for ff_num, ff in enumerate(self.new_ffs[1:]): for i in range(0, len(last_best_ff.params)): ff.params[i].value = ( (ff.params[i].value + self.new_ffs[0].params[i].value) / 2) self.ff.export_ff(params=ff.params) data = calculate.main(self.args_ff) #deprecated #ff.score = compare.compare_data(r_data, data) r_dict = compare.data_by_type(r_data) c_dict = compare.data_by_type(data) r_dict, c_dict = compare.trim_data(r_dict, c_dict) ff.score = compare.compare_data(r_dict, c_dict) ff.method += ' MC' opt.pretty_ff_results(ff) else: logger.log( 20, ' -- Contraction failed. Keeping parmaeters ' 'anyway.') self.new_ffs[-1] = con_ff self.new_ffs = sorted(self.new_ffs, key=lambda x: x.score) # Keep track of the number of cycles without change. If there's # improvement, reset the counter. if self.new_ffs[0].score < last_best_ff.score: cycles_wo_change = 0 else: cycles_wo_change += 1 logger.log( 20, ' -- {} cycles without improvement out of {} ' 'allowed.'.format(cycles_wo_change, self._max_cycles_wo_change)) logger.log(20, 'BEST:') opt.pretty_ff_results(self.new_ffs[0], level=20) logger.log( 20, '~~ END SIMPLEX CYCLE {} ~~'.format(current_cycle).rjust( 79, '~')) # This sort is likely unnecessary because it should be done at the end # of the last loop cycle, but I put it here just in case. self.new_ffs = sorted(self.new_ffs, key=lambda x: x.score) best_ff = self.new_ffs[0] if best_ff.score < self.ff.score: logger.log( 20, '~~ SIMPLEX FINISHED WITH IMPROVEMENTS ~~'.rjust(79, '~')) best_ff = restore_simp_ff(best_ff, self.ff) else: logger.log( 20, '~~ SIMPLEX FINISHED WITHOUT IMPROVEMENTS ~~'.rjust(79, '~')) # This restores the inital parameters, so no need to use # restore_simp_ff here. best_ff = self.ff opt.pretty_ff_results(self.ff, level=20) opt.pretty_ff_results(best_ff, level=20) logger.log(20, ' -- Writing best force field from simplex.') best_ff.export_ff(best_ff.path) return best_ff
#prepare.main(taggedfolder, segmentfolder, datafolder, segmentlength, stoplistfile, featuretype) # ================================= # Calculate # ================================= """ This module performs the actual distinctiveness measure for each feature. The calculation can be based on relative or binary features. The calculation can work in several ways: by division, subtraction as well as with or without applying some log transformation. """ contrast = ["subgenre", "detective", "historical"] # category, group1, group2; or: "random", "one", "two" logaddition= 0.5 # has effect on log calculation. calculate.main(datafolder, metadatafile, contrast, logaddition, resultsfolder, segmentlength, featuretype) # ================================= # Visualize # ================================= """ This module provides several plotting functionalities. "zetabarchart" shows the n words with the most extreme, negative and postive, scores. "typescatterplot" provides a scatterplot in which each dot is one feature. """ # This is for a horizontal barchart for plotting Zeta and similar scores per feature. numfeatures = 20
def setUp(self): self.conn = calculate.main(' -d d_rhod -mq X001_E1.01.mae'.split())
def setUp(self): self.conn = calculate.main( ' -d d_rhod -meo X001_E1.01.mae X001_E2.02.mae X001_Z1.02.mae ' 'X001_Z2.02.mae'.split())
def setUp(self): self.f_conn = calculate.main(' -d d_rhod -mb X001_E1.01.mae'.split()) self.r_conn = calculate.main(' -d d_rhod -jb X001_E1.01.mae'.split())
def setUp(self): self.conn = calculate.main( ' -d d_rhod -jeige X001_E1.01.in,X001_E1.out'.split())
def run(self, r_data=None): """ Once all attributes are setup as you so desire, run this method to optimize the parameters. Returns ------- `datatypes.FF` (or subclass) Contains the best parameters. """ if r_data is None: r_data = opt.return_ref_data(self.args_ref) logger.log(20, '~~ SIMPLEX OPTIMIZATION ~~'.rjust(79, '~')) # Here we don't actually need the database connection/force field data. # We only need the score. if self.ff.score is None: logger.log(20, '~~ CALCULATING INITIAL FF SCORE ~~'.rjust(79, '~')) self.ff.export_ff() # I could store this object to prevent on self.ff to prevent garbage # collection. Would be nice if simplex was followed by gradient, # which needs that information, and if simplex yielded no # improvements. At most points in the optimization, this is probably # too infrequent for it to be worth the memory, but it might be nice # once the parameters are close to convergence. data = calculate.main(self.args_ff) self.ff.score = compare.compare_data(r_data, data) logger.log(20, 'INITIAL FF SCORE: {}'.format(self.ff.score)) else: logger.log(15, ' -- Reused existing score and data for initial FF.') logger.log(15, 'INIT FF SCORE: {}'.format(self.ff.score)) if self.max_params and len(self.ff.params) > self.max_params: if self.ff.params[0].d1: logger.log(15, ' -- Reusing existing parameter derivatives.') # Don't score so this really doesn't take much time. ffs = opt.differentiate_ff(self.ff, central=False) else: logger.log(15, ' -- Calculating new parameter derivatives.') ffs = opt.differentiate_ff(self.ff, central=True) # We have to score to get the derivatives. for ff in ffs: ff.export_ff(lines=self.ff_lines) logger.log(20, ' -- Calculating {}.'.format(ff)) data = calculate.main(self.args_ff) ff.score = compare.compare_data(r_data, data) opt.pretty_ff_results(ff) opt.param_derivs(self.ff, ffs) # Only keep the forward differentiated FFs. ffs = opt.extract_forward(ffs) params = select_simp_params_on_derivs(self.ff.params, max_params=self.max_params) self.new_ffs = opt.extract_ff_by_params(ffs, params) # Reduce number of parameters. # Will need an option that's not MM3* specific. ff_rows = [x.mm3_row for x in params] ff_cols = [x.mm3_col for x in params] for ff in self.new_ffs: new_params = [] for param in ff.params: if param.mm3_row in ff_rows and param.mm3_col in ff_cols: new_params.append(param) ff.params = new_params else: self.new_ffs = opt.differentiate_ff(self.ff, central=False) # Double check and make sure they're all scored. for ff in self.new_ffs: if ff.score is None: ff.export_ff(lines=self.ff_lines) logger.log(20, ' -- Calculating {}.'.format(ff)) data = calculate.main(self.args_ff) ff.score = compare.compare_data(r_data, data) opt.pretty_ff_results(ff) ff_copy = copy.deepcopy(self.ff) new_params = [] for param in ff.params: if param.mm3_row in ff_rows and param.mm3_col in ff_cols: new_params.append(param) ff_copy.params = new_params self.new_ffs = sorted(self.new_ffs + [ff_copy], key=lambda x: x.score) wrapper = textwrap.TextWrapper(width=79) logger.log(20, 'ORDERED FF SCORES:') logger.log( 20, wrapper.fill('{}'.format(' '.join('{:15.4f}'.format(x.score) for x in self.new_ffs)))) # Shows all FFs parameters. opt.pretty_ff_params(self.new_ffs) # Start the simplex cycles. current_cycle = 0 cycles_wo_change = 0 while current_cycle < self.max_cycles \ and cycles_wo_change < self.max_cycles_wo_change: current_cycle += 1 last_best = self.new_ffs[0].score best_ff = self.new_ffs[0] logger.log( 20, '~~ START SIMPLEX CYCLE {} ~~'.format(current_cycle).rjust( 79, '~')) inv_ff = self.ff.__class__() if self.do_weighted_reflection: inv_ff.method = 'WEIGHTED INVERSION' else: inv_ff.method = 'INVERSION' inv_ff.params = copy.deepcopy(best_ff.params) ref_ff = self.ff.__class__() ref_ff.method = 'REFLECTION' ref_ff.params = copy.deepcopy(best_ff.params) for i in xrange(0, len(best_ff.params)): if self.do_weighted_reflection: try: inv_val = (sum([ x.params[i].value * (x.score - self.new_ffs[-1].score) for x in self.new_ffs[:-1] ]) / sum([ x.score - self.new_ffs[-1].score for x in self.new_ffs[:-1] ])) except ZeroDivisionError: logger.warning( 'Attempted to divide by zero while calculating the ' 'weighted simplex inversion point. All penalty ' 'function scores for the trial force fields are ' 'numerically equivalent.') # Breaking should just exit the while loop. Should still # give you the best force field determined thus far. break else: inv_val = ( sum([x.params[i].value for x in self.new_ffs[:-1]]) / len(self.new_ffs[:-1])) inv_ff.params[i].value = inv_val ref_ff.params[i].value = (2 * inv_val - self.new_ffs[-1].params[i].value) # Calculate score for inverted parameters. self.ff.export_ff(self.ff.path, params=inv_ff.params) data = calculate.main(self.args_ff) inv_ff.score = compare.compare_data(r_data, data) opt.pretty_ff_results(inv_ff) # Calculate score for reflected parameters. self.ff.export_ff(self.ff.path, params=ref_ff.params) data = calculate.main(self.args_ff) ref_ff.score = compare.compare_data(r_data, data) opt.pretty_ff_results(ref_ff) if ref_ff.score < self.new_ffs[0].score: logger.log(20, '~~ ATTEMPTING EXPANSION ~~'.rjust(79, '~')) exp_ff = self.ff.__class__() exp_ff.method = 'EXPANSION' exp_ff.params = copy.deepcopy(best_ff.params) for i in xrange(0, len(self.new_ffs[0].params)): exp_ff.params[i].value = ( 3 * inv_ff.params[i].value - 2 * self.new_ffs[-1].params[i].value) self.ff.export_ff(self.ff.path, exp_ff.params) data = calculate.main(self.args_ff) exp_ff.score = compare.compare_data(r_data, data) opt.pretty_ff_results(exp_ff) if exp_ff.score < ref_ff.score: self.new_ffs[-1] = exp_ff logger.log( 20, ' -- Expansion succeeded. Keeping expanded ' 'parameters.') else: self.new_ffs[-1] = ref_ff logger.log( 20, ' -- Expansion failed. Keeping reflected parameters.') elif ref_ff.score < self.new_ffs[-2].score: logger.log(20, ' -- Keeping reflected parameters.') self.new_ffs[-1] = ref_ff else: logger.log(20, '~~ ATTEMPTING CONTRACTION ~~'.rjust(79, '~')) con_ff = self.ff.__class__() con_ff.method = 'CONTRACTION' con_ff.params = copy.deepcopy(best_ff.params) for i in xrange(0, len(best_ff.params)): if ref_ff.score > self.new_ffs[-1].score: con_val = ((inv_ff.params[i].value + self.new_ffs[-1].params[i].value) / 2) else: con_val = ((3 * inv_ff.params[i].value - self.new_ffs[-1].params[i].value) / 2) con_ff.params[i].value = con_val self.ff.export_ff(self.ff.path, params=con_ff.params) data = calculate.main(self.args_ff) con_ff.score = compare.compare_data(r_data, data) opt.pretty_ff_results(con_ff) if con_ff.score < self.new_ffs[-2].score: self.new_ffs[-1] = con_ff elif self.do_massive_contraction: logger.log( 20, '~~ DOING MASSIVE CONTRACTION ~~'.rjust(79, '~')) for ff_num, ff in enumerate(self.new_ffs[1:]): for i in xrange(0, len(best_ff.params)): ff.params[i].value = ( (ff.params[i].value + self.new_ffs[0].params[i].value) / 2) self.ff.export_ff(self.ff.path, params=ff.params) data = calculate.main(self.args_ff) ff.score = compare.compare_data(r_data, data) ff.method += ' MC' opt.pretty_ff_results(ff) else: logger.log(20, ' -- Contraction failed.') self.new_ffs = sorted(self.new_ffs, key=lambda x: x.score) if self.new_ffs[0].score < last_best: cycles_wo_change = 0 else: cycles_wo_change += 1 logger.log( 20, ' -- {} cycles without change.'.format(cycles_wo_change)) best_ff = self.new_ffs[0] logger.log(20, 'BEST:') opt.pretty_ff_results(self.new_ffs[0], level=20) logger.log( 20, '~~ END SIMPLEX CYCLE {} ~~'.format(current_cycle).rjust( 79, '~')) if best_ff.score < self.ff.score: logger.log( 20, '~~ SIMPLEX FINISHED WITH IMPROVEMENTS ~~'.rjust(79, '~')) best_ff = restore_simp_ff(best_ff, self.ff) else: logger.log( 20, '~~ SIMPLEX FINISHED WITHOUT IMPROVEMENTS ~~'.rjust(79, '~')) opt.pretty_ff_results(self.ff, level=20) opt.pretty_ff_results(best_ff, level=20) logger.log(20, ' -- Writing best force field from simplex.') best_ff.export_ff(best_ff.path) return best_ff
def run(self, ref_data=None, restart=None): """ Runs the gradient optimization. Ensure that the attributes in __init__ are set as you desire before using this function. Returns ------- `datatypes.FF` (or subclass) Contains the best parameters. """ # We need reference data if you didn't provide it. if ref_data is None: ref_data = opt.return_ref_data(self.args_ref) # We need the initial FF data. if self.ff.data is None: logger.log(20, '~~ GATHERING INITIAL FF DATA ~~'.rjust(79, '~')) # Is opt.Optimizer.ff_lines used anymore? self.ff.export_ff() self.ff.data = calculate.main(self.args_ff) # Not 100% sure if this is necessary, but it certainly doesn't hurt. compare.correlate_energies(ref_data, self.ff.data) r_dict = compare.data_by_type(ref_data) c_dict = compare.data_by_type(self.ff.data) r_dict, c_dict = compare.trim_data(r_dict, c_dict) if self.ff.score is None: # Already zeroed reference and correlated the energies. self.ff.score = compare.compare_data(r_dict, c_dict) data_types = [] for typ in r_dict: data_types.append(typ) data_types.sort() logger.log(20, '~~ GRADIENT OPTIMIZATION ~~'.rjust(79, '~')) logger.log(20, 'INIT FF SCORE: {}'.format(self.ff.score)) opt.pretty_ff_results(self.ff, level=20) logger.log(20, '~~ CENTRAL DIFFERENTIATION ~~'.rjust(79, '~')) if restart: par_file = restart logger.log( 20, ' -- Restarting gradient from central ' 'differentiation file {}.'.format(par_file)) else: # We need a file to hold the differentiated parameter data. par_files = glob.glob(os.path.join(self.direc, 'par_diff_???.txt')) if par_files: par_files.sort() most_recent_par_file = par_files[-1] most_recent_par_file = most_recent_par_file.split('/')[-1] most_recent_num = most_recent_par_file[9:12] num = int(most_recent_num) + 1 par_file = 'par_diff_{:03d}.txt'.format(num) else: par_file = 'par_diff_001.txt' logger.log( 20, ' -- Generating central differentiation ' 'file {}.'.format(par_file)) f = open(os.path.join(self.direc, par_file), 'w') csv_writer = csv.writer(f) # Row 1 - Labels # Row 2 - Weights # Row 3 - Reference data values # Row 4 - Initial FF data values ## Deprecated -TR #csv_writer.writerow([x.lbl for x in ref_data]) #csv_writer.writerow([x.wht for x in ref_data]) #csv_writer.writerow([x.val for x in ref_data]) #csv_writer.writerow([x.val for x in self.ff.data]) writerows = [[], [], [], []] for data_type in data_types: writerows[0].extend([x.lbl for x in r_dict[data_type]]) writerows[1].extend([x.wht for x in r_dict[data_type]]) writerows[2].extend([x.val for x in r_dict[data_type]]) writerows[3].extend([x.val for x in c_dict[data_type]]) for row in writerows: csv_writer.writerow(row) logger.log(20, '~~ DIFFERENTIATING PARAMETERS ~~'.rjust(79, '~')) # Save many FFs, each with their own parameter sets. ffs = opt.differentiate_ff(self.ff) logger.log( 20, '~~ SCORING DIFFERENTIATED PARAMETERS ~~'.rjust(79, '~')) for ff in ffs: ff.export_ff(lines=self.ff.lines) logger.log(20, ' -- Calculating {}.'.format(ff)) data = calculate.main(self.args_ff) # Deprecated #ff.score = compare.compare_data(ref_data, data) c_data = compare.data_by_type(data) r_dict, c_data = compare.trim_data(r_dict, c_data) ff.score = compare.compare_data(r_dict, c_data) opt.pretty_ff_results(ff) # Write the data rather than storing it in memory. For large # parameter sets, this could consume GBs of memory otherwise! #csv_writer.writerow([x.val for x in data]) row = [] for data_type in data_types: row.extend([x.val for x in c_data[data_type]]) csv_writer.writerow(row) f.close() # Make sure we have derivative information. Used for NR. # # The derivatives are useful for checking up on the progress of the # optimization and for deciding which parameters to use in a # subsequent simplex optimization. # # Still need a way to do this with the resatrt file. opt.param_derivs(self.ff, ffs) # Calculate the Jacobian, residual vector, matrix A and vector b. # These aren't needed if you're only doing Newton-Raphson. if self.do_lstsq or self.do_lagrange or self.do_levenberg or \ self.do_svd: logger.log(20, '~~ JACOBIAN AND RESIDUAL VECTOR ~~'.rjust(79, '~')) # Setup the residual vector. # Deprecated - TR #num_d = len(ref_data) num_d = 0 for datatype in r_dict: num_d += len(r_dict[datatype]) resid = np.empty((num_d, 1), dtype=float) # Deprecated - TR #for i in xrange(0, num_d): # resid[i, 0] = ref_data[i].wht * \ # (ref_data[i].val - self.ff.data[i].val) count = 0 for data_type in data_types: for r, c in zip(r_dict[data_type], c_dict[data_type]): resid[count, 0] = r.wht * (r.val - c.val) count += 1 # logger.log(5, 'RESIDUAL VECTOR:\n{}'.format(resid)) logger.log(20, ' -- Formed {} residual vector.'.format(resid.shape)) # Setup the Jacobian. num_p = len(self.ff.params) # Maybe should be a part of the Jacobian function. jacob = np.empty((num_d, num_p), dtype=float) jacob = return_jacobian(jacob, os.path.join(self.direc, par_file)) # logger.log(5, 'JACOBIAN:\n{}'.format(jacob)) logger.log(20, ' -- Formed {} Jacobian.'.format(jacob.shape)) ma = jacob.T.dot(jacob) vb = jacob.T.dot(resid) # We need these for most optimization methods. logger.log(5, ' MATRIX A AND VECTOR B '.center(79, '-')) # logger.log(5, 'A:\n{}'.format(ma)) # logger.log(5, 'b:\n{}'.format(vb)) # Start coming up with new parameter sets. if self.do_newton and not restart: logger.log(20, '~~ NEWTON-RAPHSON ~~'.rjust(79, '~')) # Moved the derivative section outside of here. changes = do_newton(self.ff.params, radii=self.newton_radii, cutoffs=self.newton_cutoffs) cleanup(self.new_ffs, self.ff, changes) if self.do_lstsq: logger.log(20, '~~ LEAST SQUARES ~~'.rjust(79, '~')) changes = do_lstsq(ma, vb, radii=self.lstsq_radii, cutoffs=self.lstsq_cutoffs) cleanup(self.new_ffs, self.ff, changes) if self.do_lagrange: logger.log(20, '~~ LAGRANGE ~~'.rjust(79, '~')) for factor in sorted(self.lagrange_factors): changes = do_lagrange(ma, vb, factor, radii=self.lagrange_radii, cutoffs=self.lagrange_cutoffs) cleanup(self.new_ffs, self.ff, changes) if self.do_levenberg: logger.log(20, '~~ LEVENBERG ~~'.rjust(79, '~')) for factor in sorted(self.levenberg_factors): changes = do_levenberg(ma, vb, factor, radii=self.levenberg_radii, cutoffs=self.levenberg_cutoffs) cleanup(self.new_ffs, self.ff, changes) if self.do_svd: logger.log(20, '~~ SINGULAR VALUE DECOMPOSITION ~~'.rjust(79, '~')) # J = U . s . VT mu, vs, mvt = return_svd(jacob) logger.log(1, '>>> mu.shape: {}'.format(mu.shape)) logger.log(1, '>>> vs.shape: {}'.format(vs.shape)) logger.log(1, '>>> mvt.shape: {}'.format(mvt.shape)) logger.log(1, '>>> vb.shape: {}'.format(vb.shape)) if self.svd_factors: changes = do_svd_w_thresholds(mu, vs, mvt, resid, self.svd_factors, radii=self.svd_radii, cutoffs=self.svd_cutoffs) else: changes = do_svd_wo_thresholds(mu, vs, mvt, resid, radii=self.svd_radii, cutoffs=self.svd_cutoffs) cleanup(self.new_ffs, self.ff, changes) # Report how many trial FFs were generated. logger.log( 20, ' -- Generated {} trial force field(s).'.format( len(self.new_ffs))) # If there are any trials, test them. if self.new_ffs: logger.log(20, '~~ EVALUATING TRIAL FF(S) ~~'.rjust(79, '~')) for ff in self.new_ffs: data = opt.cal_ff(ff, self.args_ff, parent_ff=self.ff) # Shouldn't need to zero anymore. # Deprecated #ff.score = compare.compare_data(ref_data, data) c_data = compare.data_by_type(data) r_dict, c_data = compare.trim_data(r_dict, c_data) ff.score = compare.compare_data(r_dict, c_data) opt.pretty_ff_results(ff) self.new_ffs = sorted(self.new_ffs, key=lambda x: x.score) # Check for improvement. if self.new_ffs[0].score < self.ff.score: ff = self.new_ffs[0] logger.log( 20, '~~ GRADIENT FINISHED WITH IMPROVEMENTS ~~'.rjust(79, '~')) opt.pretty_ff_results(self.ff, level=20) opt.pretty_ff_results(ff, level=20) # Copy parameter derivatives from original FF to save time in # case we move onto simplex immediately after this. copy_derivs(self.ff, ff) else: ff = self.ff else: ff = self.ff ff.export_ff(ff.path) return ff
# 执行一个py文件的方式: # 在cmd执行,在python执行 : 直接执行这个文件 - 以脚本的形式运行这个文件 # 导入这个文件 # 都是py文件 # 直接运行这个文件 这个文件就是一个脚本 # 导入这个文件 这个文件就是一个模块 # import re # import time # # import my_module # import calculate # # ret = calculate.main('1*2+3') # print(ret) # 当一个py文件 # 当做一个脚本的时候 : 能够独立的提供一个功能,能自主完成交互 # 当成一个模块的时候 : 能够被导入这调用这个功能,不能自主交互 # 一个文件中的__name__变量 # 当这个文件被当做脚本执行的时候 __name__ == '__main__' # 当这个文件被当做模块导入的时候 __name__ == '模块的名字' import calculate print(calculate.main('1+2'))
def run(self, r_data=None): """ Once all attributes are setup as you so desire, run this method to optimize the parameters. Returns ------- `datatypes.FF` (or subclass) Contains the best parameters. """ if r_data is None: r_data = opt.return_ref_data(self.args_ref) logger.log(20, '~~ SIMPLEX OPTIMIZATION ~~'.rjust(79, '~')) # Here we don't actually need the database connection/force field data. # We only need the score. if self.ff.score is None: logger.log(20, '~~ CALCULATING INITIAL FF SCORE ~~'.rjust(79, '~')) self.ff.export_ff() # I could store this object to prevent on self.ff to prevent garbage # collection. Would be nice if simplex was followed by gradient, # which needs that information, and if simplex yielded no # improvements. At most points in the optimization, this is probably # too infrequent for it to be worth the memory, but it might be nice # once the parameters are close to convergence. data = calculate.main(self.args_ff) self.ff.score = compare.compare_data(r_data, data, zero=False) logger.log(20, 'INITIAL FF SCORE: {}'.format(self.ff.score)) else: logger.log(15, ' -- Reused existing score and data for initial FF.') logger.log(15, 'INIT FF SCORE: {}'.format(self.ff.score)) ffs = opt.differentiate_ff(self.ff) for ff in ffs: ff.export_ff(lines=self.ff_lines) logger.log(20, ' -- Calculating {}.'.format(ff)) data = calculate.main(self.args_ff) ff.score = compare.compare_data(r_data, data, zero=False) opt.pretty_ff_results(ff) if self.max_params and len(self.ff.params) > self.max_params: simp_params = reduce_num_simp_params( self.ff, ffs, max_params=self.max_params) self.new_ffs = reduce_num_simp_ffs( ffs, simp_params) else: self.new_ffs = ffs self.new_ffs = sorted(self.new_ffs + [self.ff], key=lambda x: x.score) wrapper = textwrap.TextWrapper(width=79) logger.log(20, 'ORDERED FF SCORES:') logger.log(20, wrapper.fill('{}'.format( ' '.join('{:15.4f}'.format(x.score) for x in self.new_ffs)))) # Shows all FFs parameters. opt.pretty_ff_params(self.new_ffs) # Start the simplex cycles. current_cycle = 0 cycles_wo_change = 0 while current_cycle < self.max_cycles \ and cycles_wo_change < self.max_cycles_wo_change: current_cycle += 1 last_best = self.new_ffs[0].score best_ff = self.new_ffs[0] logger.log(20, '~~ START SIMPLEX CYCLE {} ~~'.format( current_cycle).rjust(79, '~')) inv_ff = self.ff.__class__() if self.do_weighted_reflection: inv_ff.method = 'WEIGHTED INVERSION' else: inv_ff.method = 'INVERSION' inv_ff.params = copy.deepcopy(best_ff.params) ref_ff = self.ff.__class__() ref_ff.method = 'REFLECTION' ref_ff.params = copy.deepcopy(best_ff.params) for i in xrange(0, len(best_ff.params)): if self.do_weighted_reflection: try: inv_val = ( sum([x.params[i].value * (x.score - self.new_ffs[-1].score) for x in self.new_ffs[:-1]]) / sum([x.score - self.new_ffs[-1].score for x in self.new_ffs[:-1]])) except ZeroDivisionError: logger.warning( 'Attempted to divide by zero while calculating the ' 'weighted simplex inversion point. All penalty ' 'function scores for the trial force fields are ' 'numerically equivalent.') # Breaking should just exit the while loop. Should still # give you the best force field determined thus far. break else: inv_val = ( sum([x.params[i].value for x in self.new_ffs[:-1]]) / len(self.new_ffs[:-1])) inv_ff.params[i].value = inv_val ref_ff.params[i].value = ( 2 * inv_val - self.new_ffs[-1].params[i].value) # Calculate score for inverted parameters. self.ff.export_ff(self.ff.path, params=inv_ff.params) data = calculate.main(self.args_ff) inv_ff.score = compare.compare_data(r_data, data, zero=False) opt.pretty_ff_results(inv_ff) # Calculate score for reflected parameters. self.ff.export_ff(self.ff.path, params=ref_ff.params) data = calculate.main(self.args_ff) ref_ff.score = compare.compare_data(r_data, data, zero=False) opt.pretty_ff_results(ref_ff) if ref_ff.score < self.new_ffs[0].score: logger.log(20, '~~ ATTEMPTING EXPANSION ~~'.rjust(79, '~')) exp_ff = self.ff.__class__() exp_ff.method = 'EXPANSION' exp_ff.params = copy.deepcopy(best_ff.params) for i in xrange(0, len(self.new_ffs[0].params)): exp_ff.params[i].value = ( 3 * inv_ff.params[i].value - 2 * self.new_ffs[-1].params[i].value) self.ff.export_ff(self.ff.path, exp_ff.params) data = calculate.main(self.args_ff) exp_ff.score = compare.compare_data(r_data, data, zero=False) opt.pretty_ff_results(exp_ff) if exp_ff.score < ref_ff.score: self.new_ffs[-1] = exp_ff logger.log( 20, ' -- Expansion succeeded. Keeping expanded ' 'parameters.') else: self.new_ffs[-1] = ref_ff logger.log( 20, ' -- Expansion failed. Keeping reflected parameters.') elif ref_ff.score < self.new_ffs[-2].score: logger.log(20, ' -- Keeping reflected parameters.') self.new_ffs[-1] = ref_ff else: logger.log(20, '~~ ATTEMPTING CONTRACTION ~~'.rjust(79, '~')) con_ff = self.ff.__class__() con_ff.method = 'CONTRACTION' con_ff.params = copy.deepcopy(best_ff.params) for i in xrange(0, len(best_ff.params)): if ref_ff.score > self.new_ffs[-1].score: con_val = ( (inv_ff.params[i].value + self.new_ffs[-1].params[i].value) / 2) else: con_val = ( (3 * inv_ff.params[i].value - self.new_ffs[-1].params[i].value) / 2) con_ff.params[i].value = con_val self.ff.export_ff(self.ff.path, params=con_ff.params) data = calculate.main(self.args_ff) con_ff.score = compare.compare_data(r_data, data, zero=False) opt.pretty_ff_results(con_ff) if con_ff.score < self.new_ffs[-2].score: self.new_ffs[-1] = con_ff elif self.do_massive_contraction: logger.log( 20, '~~ DOING MASSIVE CONTRACTION ~~'.rjust(79, '~')) for ff_num, ff in enumerate(self.new_ffs[1:]): for i in xrange(0, len(best_ff.params)): ff.params[i].value = ( (ff.params[i].value + self.new_ffs[0].params[i].value) / 2) self.ff.export_ff(self.ff.path, params=ff.params) data = calculate.main(self.args_ff) ff.score = compare.compare_data(r_data, data, zero=False) ff.method += ' MC' opt.pretty_ff_results(ff) else: logger.log(20, ' -- Contraction failed.') self.new_ffs = sorted(self.new_ffs, key=lambda x: x.score) if self.new_ffs[0].score < last_best: cycles_wo_change = 0 else: cycles_wo_change += 1 logger.log(20, ' -- {} cycles without change.'.format( cycles_wo_change)) best_ff = self.new_ffs[0] logger.log(20, 'BEST:') opt.pretty_ff_results(self.new_ffs[0], level=20) logger.log(20, '~~ END SIMPLEX CYCLE {} ~~'.format( current_cycle).rjust(79, '~')) if best_ff.score < self.ff.score: logger.log(20, '~~ SIMPLEX FINISHED WITH IMPROVEMENTS ~~'.rjust( 79, '~')) best_ff = restore_simp_ff(best_ff, self.ff) else: logger.log(20, '~~ SIMPLEX FINISHED WITHOUT IMPROVEMENTS ~~'.rjust( 79, '~')) opt.pretty_ff_results(self.ff, level=20) opt.pretty_ff_results(best_ff, level=20) logger.log(20, ' -- Writing best force field from simplex.') best_ff.export_ff(best_ff.path) return best_ff
def main_op(self, tst_str, mock_stdout): with mock.patch('builtins.input', side_effect=tst_str): calculate.main() return mock_stdout.getvalue()
def run(self, ref_data=None): # We need reference data if you didn't provide it. if ref_data is None: ref_data = opt.return_ref_data(self.args_ref) # We need the initial FF data. if self.ff.data is None: logger.log(20, '~~ GATHERING INITIAL FF DATA ~~'.rjust(79, '~')) # Check whether this is efficient with the ff_lines. self.ff.export_ff() self.ff.data = calculate.main(self.args_ff) # We could do this, but the zeroing of energies has already been # done. # self.ff.score = compare.compare_data(ref_data, self.ff.data) # So instead we do this. compare.correlate_energies(ref_data, self.ff.data) if self.ff.score is None: # Already zeroed reference and correlated the energies. self.ff.score = compare.calculate_score(ref_data, self.ff.data) logger.log(20, 'INITIAL FF SCORE: {}'.format(self.ff.fscore)) logger.log(20, '~~ GRADIENT OPTIMIZATION ~~'.rjust(79, '~')) # We need a file to hold the differentiated parameter data. par_files = glob.glob(os.path.join(self.direc, 'par_diff_???.txt')) if par_files: par_files.sort() most_recent_par_file = par_files[-1] most_recent_par_file = most_recent_par_file.split('/')[-1] most_recent_num = most_recent_par_file[9:12] num = int(most_recent_num) + 1 par_file = 'par_diff_{:03d}.txt'.format(num) else: par_file = 'par_diff_001.txt' f = open(os.path.join(self.direc, par_file), 'w') csv_writer = csv.writer(f) # Row 1 - Labels # Row 2 - Weights # Row 3 - Reference data values # Row 4 - Initial FF data values csv_writer.writerow([x.lbl for x in ref_data]) csv_writer.writerow([x.wht for x in ref_data]) csv_writer.writerow([x.val for x in ref_data]) csv_writer.writerow([x.val for x in self.ff.data]) logger.log(20, '~~ DIFFERENTIATING PARAMETERS ~~'.rjust(79, '~')) # Setup the residual vector. # Perhaps move this closer to the Jacobian section. num_d = len(ref_data) resid = np.empty((num_d, 1), dtype=float) for i in xrange(0, num_d): resid[i, 0] = ref_data[i].wht * (ref_data[i].val - self.ff.data[i].val) logger.log(5, 'RESIDUAL VECTOR:\n{}'.format(resid)) logger.log(20, ' -- Formed {} residual vector.'.format(resid.shape)) # Save many FFs, each with their own parameter sets. ffs = opt.differentiate_ff(self.ff) logger.log(20, '~~ SCORING DIFFERENTIATED PARAMETERS ~~'.rjust(79, '~')) for ff in ffs: ff.export_ff(lines=self.ff.lines) logger.log(20, ' -- Calculating {}.'.format(ff)) data = calculate.main(self.args_ff) compare.correlate_energies(ref_data, data) ff.score = compare.calculate_score(ref_data, data) opt.pretty_ff_results(ff) # Write the data rather than storing it in memory. For large parameter # sets, this could consume GBs of memory otherwise! csv_writer.writerow([x.val for x in data]) f.close() # Calculate the Jacobian, residual vector, matrix A and vector b. # These aren't needed if you're only doing Newton-Raphson. if self.do_lstsq or self.do_lagrange or self.do_levenberg or \ self.do_svd: logger.log(20, '~~ JACOBIAN AND RESIDUAL VECTOR ~~'.rjust(79, '~')) # Setup the Jacobian. num_p = len(self.ff.params) # Maybe should be a part of the Jacobian function. jacob = np.empty((num_d, num_p), dtype=float) jacob = return_jacobian(jacob, os.path.join(self.direc, par_file)) ma = jacob.T.dot(jacob) vb = jacob.T.dot(resid) # We need these for most optimization methods. logger.log(5, ' MATRIX A AND VECTOR B '.center(79, '-')) logger.log(5, 'A:\n{}'.format(ma)) logger.log(5, 'b:\n{}'.format(vb)) # Start coming up with new parameter sets. if self.do_newton: logger.log(20, '~~ NEWTON-RAPHSON ~~'.rjust(79, '~')) # Make sure we have derivative information. if self.ff.params[0].d1 is None: opt.param_derivs(self.ff, ffs) changes = do_newton(self.ff.params, radii=self.newton_radii, cutoffs=self.newton_cutoffs) cleanup(self.new_ffs, self.ff, changes) if self.do_lstsq: logger.log(20, '~~ LEAST SQUARES ~~'.rjust(79, '~')) changes = do_lstsq(ma, vb, radii=self.lstsq_radii, cutoffs=self.lstsq_cutoffs) cleanup(self.new_ffs, self.ff, changes) if self.do_lagrange: logger.log(20, '~~ LAGRANGE ~~'.rjust(79, '~')) for factor in sorted(self.lagrange_factors): changes = do_lagrange(ma, vb, factor, radii=self.lagrange_radii, cutoffs=self.lagrange_cutoffs) cleanup(self.new_ffs, self.ff, changes) if self.do_levenberg: logger.log(20, '~~ LEVENBERG ~~'.rjust(79, '~')) for factor in sorted(self.levenberg_factors): changes = do_levenberg(ma, vb, factor, radii=self.levenberg_radii, cutoffs=self.levenberg_cutoffs) cleanup(self.new_ffs, self.ff, changes) if self.do_svd: logger.log(20, '~~ SINGULAR VALUE DECOMPOSITION ~~'.rjust(79, '~')) mu, vs, mv = return_svd(ma) if self.svd_factors: changes = do_svd_w_thresholds(mu, vs, mv, vb, self.svd_factors, radii=self.svd_radii, cutoffs=self.svd_cutoffs) else: changes = do_svd_wo_thresholds(mu, vs, mv, vb, radii=self.svd_radii, cutoffs=self.svd_cutoffs) cleanup(self.new_ffs, self.ff, changes) # Report how many trial FFs were generated. logger.log(20, ' -- Generated {} trial force field(s).'.format( len(self.new_ffs))) # If there are any trials, test them. if self.new_ffs: logger.log(20, '~~ EVALUATING TRIAL FF(S) ~~'.rjust(79, '~')) for ff in self.new_ffs: data = opt.cal_ff(ff, self.args_ff, parent_ff=self.ff) ff.score = compare.compare_data(ref_data, data, zero=False) opt.pretty_ff_results(ff) self.new_ffs = sorted( self.new_ffs, key=lambda x: x.score) # Check for improvement. if self.new_ffs[0].score < self.ff.score: ff = self.new_ffs[0] logger.log( 20, '~~ GRADIENT FINISHED WITH IMPROVEMENTS ~~'.rjust( 79, '~')) opt.pretty_ff_results(self.ff, level=20) opt.pretty_ff_results(ff, level=20) else: ff = self.ff else: ff = self.ff return ff
def run(self, ref_data=None, restart=None): """ Runs the gradient optimization. Ensure that the attributes in __init__ are set as you desire before using this function. Returns ------- `datatypes.FF` (or subclass) Contains the best parameters. """ # We need reference data if you didn't provide it. if ref_data is None: ref_data = opt.return_ref_data(self.args_ref) # We need the initial FF data. if self.ff.data is None: logger.log(20, '~~ GATHERING INITIAL FF DATA ~~'.rjust(79, '~')) # Is opt.Optimizer.ff_lines used anymore? self.ff.export_ff() self.ff.data = calculate.main(self.args_ff) # Not 100% sure if this is necessary, but it certainly doesn't hurt. compare.correlate_energies(ref_data, self.ff.data) r_dict = compare.data_by_type(ref_data) c_dict = compare.data_by_type(self.ff.data) r_dict, c_dict = compare.trim_data(r_dict,c_dict) if self.ff.score is None: # Already zeroed reference and correlated the energies. self.ff.score = compare.compare_data(r_dict, c_dict) data_types = [] for typ in r_dict: data_types.append(typ) data_types.sort() logger.log(20, '~~ GRADIENT OPTIMIZATION ~~'.rjust(79, '~')) logger.log(20, 'INIT FF SCORE: {}'.format(self.ff.score)) opt.pretty_ff_results(self.ff, level=20) logger.log(20, '~~ CENTRAL DIFFERENTIATION ~~'.rjust(79, '~')) if restart: par_file = restart logger.log(20, ' -- Restarting gradient from central ' 'differentiation file {}.'.format(par_file)) else: # We need a file to hold the differentiated parameter data. par_files = glob.glob(os.path.join(self.direc, 'par_diff_???.txt')) if par_files: par_files.sort() most_recent_par_file = par_files[-1] most_recent_par_file = most_recent_par_file.split('/')[-1] most_recent_num = most_recent_par_file[9:12] num = int(most_recent_num) + 1 par_file = 'par_diff_{:03d}.txt'.format(num) else: par_file = 'par_diff_001.txt' logger.log(20, ' -- Generating central differentiation ' 'file {}.'.format(par_file)) f = open(os.path.join(self.direc, par_file), 'w') csv_writer = csv.writer(f) # Row 1 - Labels # Row 2 - Weights # Row 3 - Reference data values # Row 4 - Initial FF data values ## Deprecated -TR #csv_writer.writerow([x.lbl for x in ref_data]) #csv_writer.writerow([x.wht for x in ref_data]) #csv_writer.writerow([x.val for x in ref_data]) #csv_writer.writerow([x.val for x in self.ff.data]) writerows = [[],[],[],[]] for data_type in data_types: writerows[0].extend([x.lbl for x in r_dict[data_type]]) writerows[1].extend([x.wht for x in r_dict[data_type]]) writerows[2].extend([x.val for x in r_dict[data_type]]) writerows[3].extend([x.val for x in c_dict[data_type]]) for row in writerows: csv_writer.writerow(row) logger.log(20, '~~ DIFFERENTIATING PARAMETERS ~~'.rjust(79, '~')) # Save many FFs, each with their own parameter sets. ffs = opt.differentiate_ff(self.ff) logger.log(20, '~~ SCORING DIFFERENTIATED PARAMETERS ~~'.rjust( 79, '~')) for ff in ffs: ff.export_ff(lines=self.ff.lines) logger.log(20, ' -- Calculating {}.'.format(ff)) data = calculate.main(self.args_ff) # Deprecated #ff.score = compare.compare_data(ref_data, data) c_data = compare.data_by_type(data) r_dict, c_data = compare.trim_data(r_dict,c_data) ff.score = compare.compare_data(r_dict, c_data) opt.pretty_ff_results(ff) # Write the data rather than storing it in memory. For large # parameter sets, this could consume GBs of memory otherwise! #csv_writer.writerow([x.val for x in data]) row = [] for data_type in data_types: row.extend([x.val for x in c_data[data_type]]) csv_writer.writerow(row) f.close() # Make sure we have derivative information. Used for NR. # # The derivatives are useful for checking up on the progress of the # optimization and for deciding which parameters to use in a # subsequent simplex optimization. # # Still need a way to do this with the resatrt file. opt.param_derivs(self.ff, ffs) # Calculate the Jacobian, residual vector, matrix A and vector b. # These aren't needed if you're only doing Newton-Raphson. if self.do_lstsq or self.do_lagrange or self.do_levenberg or \ self.do_svd: logger.log(20, '~~ JACOBIAN AND RESIDUAL VECTOR ~~'.rjust(79, '~')) # Setup the residual vector. # Deprecated - TR #num_d = len(ref_data) num_d = 0 for datatype in r_dict: num_d += len(r_dict[datatype]) resid = np.empty((num_d, 1), dtype=float) # Deprecated - TR #for i in xrange(0, num_d): # resid[i, 0] = ref_data[i].wht * \ # (ref_data[i].val - self.ff.data[i].val) count = 0 for data_type in data_types: for r,c in zip(r_dict[data_type],c_dict[data_type]): resid[count, 0] = r.wht * (r.val - c.val) count += 1 # logger.log(5, 'RESIDUAL VECTOR:\n{}'.format(resid)) logger.log( 20, ' -- Formed {} residual vector.'.format(resid.shape)) # Setup the Jacobian. num_p = len(self.ff.params) # Maybe should be a part of the Jacobian function. jacob = np.empty((num_d, num_p), dtype=float) jacob = return_jacobian(jacob, os.path.join(self.direc, par_file)) # logger.log(5, 'JACOBIAN:\n{}'.format(jacob)) logger.log(20, ' -- Formed {} Jacobian.'.format(jacob.shape)) ma = jacob.T.dot(jacob) vb = jacob.T.dot(resid) # We need these for most optimization methods. logger.log(5, ' MATRIX A AND VECTOR B '.center(79, '-')) # logger.log(5, 'A:\n{}'.format(ma)) # logger.log(5, 'b:\n{}'.format(vb)) # Start coming up with new parameter sets. if self.do_newton and not restart: logger.log(20, '~~ NEWTON-RAPHSON ~~'.rjust(79, '~')) # Moved the derivative section outside of here. changes = do_newton(self.ff.params, radii=self.newton_radii, cutoffs=self.newton_cutoffs) cleanup(self.new_ffs, self.ff, changes) if self.do_lstsq: logger.log(20, '~~ LEAST SQUARES ~~'.rjust(79, '~')) changes = do_lstsq(ma, vb, radii=self.lstsq_radii, cutoffs=self.lstsq_cutoffs) cleanup(self.new_ffs, self.ff, changes) if self.do_lagrange: logger.log(20, '~~ LAGRANGE ~~'.rjust(79, '~')) for factor in sorted(self.lagrange_factors): changes = do_lagrange(ma, vb, factor, radii=self.lagrange_radii, cutoffs=self.lagrange_cutoffs) cleanup(self.new_ffs, self.ff, changes) if self.do_levenberg: logger.log(20, '~~ LEVENBERG ~~'.rjust(79, '~')) for factor in sorted(self.levenberg_factors): changes = do_levenberg(ma, vb, factor, radii=self.levenberg_radii, cutoffs=self.levenberg_cutoffs) cleanup(self.new_ffs, self.ff, changes) if self.do_svd: logger.log(20, '~~ SINGULAR VALUE DECOMPOSITION ~~'.rjust(79, '~')) # J = U . s . VT mu, vs, mvt = return_svd(jacob) logger.log(1, '>>> mu.shape: {}'.format(mu.shape)) logger.log(1, '>>> vs.shape: {}'.format(vs.shape)) logger.log(1, '>>> mvt.shape: {}'.format(mvt.shape)) logger.log(1, '>>> vb.shape: {}'.format(vb.shape)) if self.svd_factors: changes = do_svd_w_thresholds(mu, vs, mvt, resid, self.svd_factors, radii=self.svd_radii, cutoffs=self.svd_cutoffs) else: changes = do_svd_wo_thresholds(mu, vs, mvt, resid, radii=self.svd_radii, cutoffs=self.svd_cutoffs) cleanup(self.new_ffs, self.ff, changes) # Report how many trial FFs were generated. logger.log(20, ' -- Generated {} trial force field(s).'.format( len(self.new_ffs))) # If there are any trials, test them. if self.new_ffs: logger.log(20, '~~ EVALUATING TRIAL FF(S) ~~'.rjust(79, '~')) for ff in self.new_ffs: data = opt.cal_ff(ff, self.args_ff, parent_ff=self.ff) # Shouldn't need to zero anymore. # Deprecated #ff.score = compare.compare_data(ref_data, data) c_data = compare.data_by_type(data) r_dict, c_data = compare.trim_data(r_dict,c_data) ff.score = compare.compare_data(r_dict, c_data) opt.pretty_ff_results(ff) self.new_ffs = sorted( self.new_ffs, key=lambda x: x.score) # Check for improvement. if self.new_ffs[0].score < self.ff.score: ff = self.new_ffs[0] logger.log( 20, '~~ GRADIENT FINISHED WITH IMPROVEMENTS ~~'.rjust( 79, '~')) opt.pretty_ff_results(self.ff, level=20) opt.pretty_ff_results(ff, level=20) # Copy parameter derivatives from original FF to save time in # case we move onto simplex immediately after this. copy_derivs(self.ff, ff) else: ff = self.ff else: ff = self.ff return ff
def run(self, r_data=None): """ Once all attributes are setup as you so desire, run this method to optimize the parameters. Returns ------- `datatypes.FF` (or subclass) Contains the best parameters. """ if r_data is None: r_data = opt.return_ref_data(self.args_ref) if self.ff.score is None: logger.log(20, '~~ CALCULATING INITIAL FF SCORE ~~'.rjust(79, '~')) self.ff.export_ff() # Could store data on self.ff.data if we wanted. Not necessary for # simplex. If simplex yielded no improvements, it would return this # FF, and then we might want the data such taht we don't have to # recalculate it in gradient. Let's hope simplex generally yields # improvements. data = calculate.main(self.args_ff) self.ff.score = compare.compare_data(r_data, data) else: logger.log(20, ' -- Reused existing score and data for initial FF.') logger.log(20, '~~ SIMPLEX OPTIMIZATION ~~'.rjust(79, '~')) logger.log(20, 'INIT FF SCORE: {}'.format(self.ff.score)) opt.pretty_ff_results(self.ff, level=20) if self.max_params and len(self.ff.params) > self.max_params: logger.log(20, ' -- More parameters than the maximum allowed.') logger.log(5, 'CURRENT PARAMS: {}'.format(len(self.ff.params))) logger.log(5, 'MAX PARAMS: {}'.format(self.max_params)) # Here we select the parameters that have the lowest 2nd # derivatives. # THIS IS SCHEDULED FOR CHANGING. THIS IS ACTUALLY NOT A GOOD # CRITERION FOR PARAMETER SELECTION. if self.ff.params[0].d1: logger.log(15, ' -- Reusing existing parameter derivatives.') # Differentiate all parameters forward. Yes, I know this is # counter-intuitive because we are going to only use subset of # the forward differentiated FFs. However, this is very # computationally inexpensive because we're not scoring them # now. We will remove the forward differentiated FFs we don't # want before scoring. ffs = opt.differentiate_ff(self.ff, central=False) else: logger.log(15, ' -- Calculating new parameter derivatives.') # Do central differentiation so we can calculate derivatives. # Another option would be to write code to determine # derivatives only from forward differentiation. ffs = opt.differentiate_ff(self.ff, central=True) # We have to score to get the derivatives. for ff in ffs: ff.export_ff(lines=self.ff_lines) logger.log(20, ' -- Calculating {}.'.format(ff)) data = calculate.main(self.args_ff) ff.score = compare.compare_data(r_data, data) opt.pretty_ff_results(ff) # Add the derivatives to your original FF. opt.param_derivs(self.ff, ffs) # Only keep the forward differentiated FFs. ffs = opt.extract_forward(ffs) logger.log(5, ' -- Keeping {} forward differentiated ' 'FFs.'.format(len(ffs))) # This sorts the parameters based upon their 2nd derivative. # It keeps the ones with lowest 2nd derivatives. # SCHEDULED FOR CHANGES. NOT A GOOD SORTING CRITERION. params = select_simp_params_on_derivs( self.ff.params, max_params=self.max_params) # From the entire list of forward differentiated FFs, pick # out the ones that have the lowest 2nd derivatives. self.new_ffs = opt.extract_ff_by_params(ffs, params) logger.log(1, '>>> len(self.new_ffs): {}'.format(len(self.new_ffs))) # Reduce number of parameters. # Will need an option that's not MM3* specific in the future. ff_rows = [x.mm3_row for x in params] ff_cols = [x.mm3_col for x in params] for ff in self.new_ffs: new_params = [] for param in ff.params: if param.mm3_row in ff_rows and param.mm3_col in ff_cols: new_params.append(param) ff.params = new_params # Make a copy of your original FF that has less parameters. ff_copy = copy.deepcopy(self.ff) new_params = [] for param in ff.params: if param.mm3_row in ff_rows and param.mm3_col in ff_cols: new_params.append(param) ff_copy.params = new_params else: # In this case it's simple. Just forward differentiate each # parameter. self.new_ffs = opt.differentiate_ff(self.ff, central=False) logger.log(1, '>>> len(self.new_ffs): {}'.format(len(self.new_ffs))) # Still make that FF copy. ff_copy = copy.deepcopy(self.ff) # Double check and make sure they're all scored. for ff in self.new_ffs: if ff.score is None: ff.export_ff(lines=self.ff_lines) logger.log(20, ' -- Calculating {}.'.format(ff)) data = calculate.main(self.args_ff) ff.score = compare.compare_data(r_data, data) opt.pretty_ff_results(ff) # Add your copy of the orignal to FF to the forward differentiated FFs. self.new_ffs = sorted(self.new_ffs + [ff_copy], key=lambda x: x.score) # Allow 3 cycles w/o change for each parameter present. Remember that # the initial FF was added here, hence the minus one. self._max_cycles_wo_change = 3 * (len(self.new_ffs) - 1) wrapper = textwrap.TextWrapper(width=79) # Shows all FFs parameters. opt.pretty_ff_params(self.new_ffs) # Start the simplex cycles. current_cycle = 0 cycles_wo_change = 0 while current_cycle < self.max_cycles \ and cycles_wo_change < self._max_cycles_wo_change: current_cycle += 1 last_best = self.new_ffs[0].score best_ff = self.new_ffs[0] logger.log(20, '~~ START SIMPLEX CYCLE {} ~~'.format( current_cycle).rjust(79, '~')) logger.log(20, 'ORDERED FF SCORES:') logger.log(20, wrapper.fill('{}'.format( ' '.join('{:15.4f}'.format(x.score) for x in self.new_ffs)))) # !!! FOR TESTING !!! # Write the best and worst FFs to some other directory. Then # write the worst FF to optimization working directory. Then # raise opt.OptError. The worst FF should be overwritten by # the best FF afterwards. # if current_cycle == 5: # self.new_ffs[-1].export_ff( # path='ref_methanol_flds/mm3_worst.fld', # lines=self.ff.lines) # self.new_ffs[0].export_ff( # path='ref_methanol_flds/mm3_best.fld', # lines=self.ff.lines) # self.new_ffs[-1].export_ff( # path='ref_methanol/mm3.fld', # lines=self.ff.lines) # raise opt.OptError # !!! END TESTING !!! inv_ff = self.ff.__class__() if self.do_weighted_reflection: inv_ff.method = 'WEIGHTED INVERSION' else: inv_ff.method = 'INVERSION' inv_ff.params = copy.deepcopy(best_ff.params) ref_ff = self.ff.__class__() ref_ff.method = 'REFLECTION' ref_ff.params = copy.deepcopy(best_ff.params) # Need score difference sum for weighted inversion. # Calculate this value before going into loop. if self.do_weighted_reflection: # If zero, should break. score_diff_sum = sum([x.score - self.new_ffs[-1].score for x in self.new_ffs[:-1]]) if score_diff_sum == 0.: logger.warning( 'No difference between force field scores. ' 'Exiting simplex.') # We want to raise opt.OptError such that # opt.catch_run_errors will write the best FF obtained thus # far. raise opt.OptError( 'No difference between force field scores. ' 'Exiting simplex.') for i in xrange(0, len(best_ff.params)): if self.do_weighted_reflection: inv_val = ( sum([x.params[i].value * (x.score - self.new_ffs[-1].score) for x in self.new_ffs[:-1]]) / score_diff_sum) else: inv_val = ( sum([x.params[i].value for x in self.new_ffs[:-1]]) / len(self.new_ffs[:-1])) inv_ff.params[i].value = inv_val ref_ff.params[i].value = ( 2 * inv_val - self.new_ffs[-1].params[i].value) # The inversion point does not need to be scored. # Calculate score for reflected parameters. self.ff.export_ff(self.ff.path, params=ref_ff.params) data = calculate.main(self.args_ff) ref_ff.score = compare.compare_data(r_data, data) opt.pretty_ff_results(ref_ff) if ref_ff.score < self.new_ffs[0].score: logger.log(20, '~~ ATTEMPTING EXPANSION ~~'.rjust(79, '~')) exp_ff = self.ff.__class__() exp_ff.method = 'EXPANSION' exp_ff.params = copy.deepcopy(best_ff.params) for i in xrange(0, len(self.new_ffs[0].params)): exp_ff.params[i].value = ( 3 * inv_ff.params[i].value - 2 * self.new_ffs[-1].params[i].value) self.ff.export_ff(self.ff.path, exp_ff.params) data = calculate.main(self.args_ff) exp_ff.score = compare.compare_data(r_data, data) opt.pretty_ff_results(exp_ff) if exp_ff.score < ref_ff.score: self.new_ffs[-1] = exp_ff logger.log( 20, ' -- Expansion succeeded. Keeping expanded ' 'parameters.') else: self.new_ffs[-1] = ref_ff logger.log( 20, ' -- Expansion failed. Keeping reflected parameters.') elif ref_ff.score < self.new_ffs[-2].score: logger.log(20, ' -- Keeping reflected parameters.') self.new_ffs[-1] = ref_ff else: logger.log(20, '~~ ATTEMPTING CONTRACTION ~~'.rjust(79, '~')) con_ff = self.ff.__class__() con_ff.method = 'CONTRACTION' con_ff.params = copy.deepcopy(best_ff.params) for i in xrange(0, len(best_ff.params)): if ref_ff.score > self.new_ffs[-1].score: con_val = ( (inv_ff.params[i].value + self.new_ffs[-1].params[i].value) / 2) else: con_val = ( (3 * inv_ff.params[i].value - self.new_ffs[-1].params[i].value) / 2) con_ff.params[i].value = con_val self.ff.export_ff(self.ff.path, params=con_ff.params) data = calculate.main(self.args_ff) con_ff.score = compare.compare_data(r_data, data) opt.pretty_ff_results(con_ff) # This change was made to reflect the 1998 Q2MM publication. # if con_ff.score < self.new_ffs[-1].score: if con_ff.score < self.new_ffs[-2].score: logger.log(20, ' -- Contraction succeeded.') self.new_ffs[-1] = con_ff elif self.do_massive_contraction: logger.log( 20, '~~ DOING MASSIVE CONTRACTION ~~'.rjust(79, '~')) for ff_num, ff in enumerate(self.new_ffs[1:]): for i in xrange(0, len(best_ff.params)): ff.params[i].value = ( (ff.params[i].value + self.new_ffs[0].params[i].value) / 2) self.ff.export_ff(self.ff.path, params=ff.params) data = calculate.main(self.args_ff) ff.score = compare.compare_data(r_data, data) ff.method += ' MC' opt.pretty_ff_results(ff) else: logger.log( 20, ' -- Contraction failed. Keeping parmaeters ' 'anyway.') self.new_ffs[-1] = con_ff self.new_ffs = sorted(self.new_ffs, key=lambda x: x.score) if self.new_ffs[0].score < last_best: cycles_wo_change = 0 else: cycles_wo_change += 1 logger.log(20, ' -- {} cycles without improvement out of {} ' 'allowed.'.format( cycles_wo_change, self._max_cycles_wo_change)) best_ff = self.new_ffs[0] logger.log(20, 'BEST:') opt.pretty_ff_results(self.new_ffs[0], level=20) logger.log(20, '~~ END SIMPLEX CYCLE {} ~~'.format( current_cycle).rjust(79, '~')) if best_ff.score < self.ff.score: logger.log(20, '~~ SIMPLEX FINISHED WITH IMPROVEMENTS ~~'.rjust( 79, '~')) best_ff = restore_simp_ff(best_ff, self.ff) else: logger.log(20, '~~ SIMPLEX FINISHED WITHOUT IMPROVEMENTS ~~'.rjust( 79, '~')) opt.pretty_ff_results(self.ff, level=20) opt.pretty_ff_results(best_ff, level=20) logger.log(20, ' -- Writing best force field from simplex.') best_ff.export_ff(best_ff.path) return best_ff
# ================================= # Calculate # ================================= """ This module performs the actual distinctiveness measure for each feature. The calculation can be based on relative or binary features. The calculation can work in several ways: by division, subtraction as well as with or without applying some log transformation. The contrast parameter takes ["category", "group1", "group2"] as in the metadata table. """ separator = ";" contrast = ["group", "Racine", "contemporaries"] # example for roman20 #contrast = ["random", "two", "one"] # for splitting groups randomly logaddition= 0.1 # has effect on log calculation. calculate.main(datafolder, dtmfolder, metadatafile, separator, contrast, logaddition, resultsfolder, segmentlength, featuretype, absolutefreqs, relativefreqs, binaryfreqs) # ================================= # Visualize # ================================= """ This module provides several plotting functionalities. "zetabarchart" shows the n words with the most extreme, negative and postive, scores. "typescatterplot" provides a scatterplot in which each dot is one feature. """ # This is for a horizontal barchart for plotting Zeta and similar scores per feature. numfeatures = 20
def run_loop_input(self, lines, score=None): lines_iterator = iter(lines) while True: try: line = next(lines_iterator) except StopIteration: return self.ff cols = line.split() if cols[0] == 'DIR': self.direc = cols[1] if cols[0] == 'FFLD': # Import FF data. if cols[1] == 'read': if cols[2] == 'mm3.fld': self.ff = datatypes.MM3( os.path.join(self.direc, cols[2])) if 'prm' in line: self.ff = datatypes.TinkerFF( os.path.join(self.direc, cols[2])) if 'frcmod' in line: self.ff = datatypes.AmberFF( os.path.join(self.direc, cols[2])) self.ff.import_ff() self.ff.method = 'READ' with open(os.path.join(self.direc, cols[2]), 'r') as f: self.ff.lines = f.readlines() # Export FF data. if cols[1] == 'write': self.ff.export_ff(os.path.join(self.direc, cols[2])) # Trim parameters. if cols[0] == 'PARM': logger.log(20, '~~ SELECTING PARAMETERS ~~'.rjust(79, '~')) self.ff.params = parameters.trim_params_by_file( self.ff.params, os.path.join(self.direc, cols[1])) if cols[0] == 'LOOP': # Read lines that will be looped over. inner_loop_lines = [] line = next(lines_iterator) while line.split()[0] != 'END': inner_loop_lines.append(line) line = next(lines_iterator) # Make loop object and populate attributes. loop = Loop() loop.convergence = float(cols[1]) loop.direc = self.direc loop.ff = self.ff loop.args_ff = self.args_ff loop.args_ref = self.args_ref loop.ref_data = self.ref_data loop.loop_lines = inner_loop_lines # Log commands. pretty_loop_input(inner_loop_lines, name='OPTIMIZATION LOOP', score=self.ff.score) # Run inner loop. self.ff = loop.opt_loop() # Note: Probably want to update this to append the directory given # by the new DIR command. if cols[0] == 'RDAT': logger.log(20, '~~ CALCULATING REFERENCE DATA ~~'.rjust(79, '~')) if len(cols) > 1: self.args_ref = ' '.join(cols[1:]).split() self.ref_data = opt.return_ref_data(self.args_ref) if cols[0] == 'CDAT': logger.log(20, '~~ CALCULATING FF DATA ~~'.rjust(79, '~')) if len(cols) > 1: self.args_ff = ' '.join(cols[1:]).split() self.ff.data = calculate.main(self.args_ff) if cols[0] == 'COMP': # Deprecated # self.ff.score = compare.compare_data( # self.ref_data, self.ff.data) # if '-o' in cols: # compare.pretty_data_comp( # self.ref_data, # self.ff.data, # os.path.join(self.direc, cols[cols.index('-o') + 1])) # if '-p' in cols: # compare.pretty_data_comp( # self.ref_data, # self.ff.data, # doprint=True) output = False doprint = False r_dict = compare.data_by_type(self.ref_data) c_dict = compare.data_by_type(self.ff.data) r_dict, c_dict = compare.trim_data(r_dict, c_dict) if '-o' in cols: output = os.path.join(self.direc, cols[cols.index('-o') + 1]) if '-p' in cols: doprint = True self.ff.score = compare.compare_data(r_dict, c_dict, output=output, doprint=doprint) if cols[0] == 'GRAD': grad = gradient.Gradient(direc=self.direc, ff=self.ff, ff_lines=self.ff.lines, args_ff=self.args_ff) #### Should probably just write a function instead of looping #### this for every gradient method. This includes everything #### between the two lines of #. TR 20180112 ############################################################## for col in cols[1:]: if "lstsq" in col: g_args = col.split('=')[1].split(',') for arg in g_args: if arg == "True": grad.do_lstsq = True elif arg == False: grad.do_lstsq = False if 'radii' in arg: grad.lstsq_radii = [] radii_vals = re.search(r"\[(.+)\]", arg).group(1).split('/') if radii_vals == "None": grad.lstsq_radii = None else: for val in radii_vals: grad.lstsq_radii.append(float(val)) if 'cutoff' in arg: grad.lstsq_cutoff = [] cutoff_vals = re.search( r"\[(.+)\]", arg).group(1).split('/') if cutoff_vals == "None": grad.lstsq_cutoff = None else: if len(cutoff_vals) > 2 or \ len(cutoff_vals) < 2: raise Exception("Cutoff values must " \ "be between two numbers.") for val in cutoff_vals: grad.lstsq_cutoff.append(float(val)) elif "newton" in col: g_args = col.split('=')[1].split(',') for arg in g_args: if arg == "True": grad.do_newton = True elif arg == False: grad.do_newton = False if 'radii' in arg: grad.newton_radii = [] radii_vals = re.search(r"\[(.+)\]", arg).group(1).split('/') if radii_vals == 'None': grad.newton_radii = None else: for val in radii_vals: grad.newton_radii.append(float(val)) if 'cutoff' in arg: grad.newton_cutoff = [] cutoff_vals = re.search( r"\[(.+)\]", arg).group(1).split('/') if cutoff_vals == 'None': grad.newton_cutoff = None else: if len(cutoff_vals) > 2 or \ len(cutoff_vals) < 2: raise Exception("Cutoff values must " \ "be between two numbers.") for val in cutoff_vals: grad.newton_cutoff.append(float(val)) elif "levenberg" in col: g_args = col.split('=')[1].split(',') for arg in g_args: if arg == "True": grad.do_levenberg = True elif arg == False: grad.do_levenberg = False if 'radii' in arg: grad.levenberg_radii = [] radii_vals = re.search(r"\[(.+)\]", arg).group(1).split('/') if radii_vals == 'None': grad.levenberg_radii = None else: for val in radii_vals: grad.levenberg_radii.append(float(val)) if 'cutoff' in arg: grad.levenberg_cutoff = [] cutoff_vals = re.search( r"\[(.+)\]", arg).group(1).split('/') if cutoff_vals == 'None': grad.levenberg_cutoff = None else: if len(cutoff_vals) > 2 or \ len(cutoff_vals) < 2: raise Exception("Cutoff values must " \ "be between two numbers.") for val in cutoff_vals: grad.levenberg_cutoff.append( float(val)) if 'factor' in arg: grad.levenberg_cutoff = [] factor_vals = re.search( r"\[(.+)\]", arg).group(1).split('/') if factor_vals == 'None': grad.levenberg_factor = None else: for val in factor_vals: grad.levenberg_factor.append( float(val)) elif "lagrange" in col: g_args = col.split('=')[1].split(',') for arg in g_args: if arg == "True": grad.do_lagrange = True elif arg == False: grad.do_lagrange = False if 'radii' in arg: grad.lagrange_radii = [] radii_vals = re.search(r"\[(.+)\]", arg).group(1).split('/') if radii_vals == 'None': grad.lagrange_radii = None else: for val in radii_vals: grad.lagrange_radii.append(float(val)) if 'cutoff' in arg: grad.lagrange_cutoff = [] cutoff_vals = re.search( r"\[(.+)\]", arg).group(1).split('/') if cutoff_vals == 'None': grad.lagrange_cutoff = None else: if len(cutoff_vals) > 2 or \ len(cutoff_vals) < 2: raise Exception("Cutoff values must " \ "be between two numbers.") for val in cutoff_vals: grad.lagrange_cutoff.append(float(val)) if 'factor' in arg: grad.lagrange_factors = [] factor_vals = re.search( r"\[(.+)\]", arg).group(1).split('/') if factor_vals == 'None': grad.lagrange_factors = None else: for val in factor_vals: grad.lagrange_factors.append( float(val)) elif "svd" in col: g_args = col.split('=')[1].split(',') for arg in g_args: if arg == "True": grad.do_svd = True elif arg == False: grad.do_svd = False if 'radii' in arg: grad.svd_radii = [] radii_vals = re.search(r"\[(.+)\]", arg).group(1).split('/') if radii_vals == 'None': grad.svd_radii = None else: for val in radii_vals: grad.svd_radii.append(float(val)) if 'cutoff' in arg: grad.svd_cutoff = [] cutoff_vals = re.search( r"\[(.+)\]", arg).group(1).split('/') if cutoff_vals == 'None': grad.svd_cutoff = None else: if len(cutoff_vals) > 2 or \ len(cutoff_vals) < 2: raise Exception("Cutoff values must " \ "be between two numbers.") for val in cutoff_vals: grad.svd_cutoff.append(float(val)) if 'factor' in arg: grad.svd_cutoff = [] factor_vals = re.search( r"\[(.+)\]", arg).group(1).split('/') if factor_vals == 'None': grad.svd_factor = None else: for val in factor_vals: grad.svd_factor.append(float(val)) else: raise Exception("'{}' : Not Recognized".format(col)) ############################################################## self.ff = grad.run(ref_data=self.ref_data) if cols[0] == 'SIMP': simp = simplex.Simplex(direc=self.direc, ff=self.ff, ff_lines=self.ff.lines, args_ff=self.args_ff) for col in cols[1:]: if "max_params" in col: simp.max_params = col.split('=')[1] else: raise Exception("'{}' : Not Recognized".format(col)) self.ff = simp.run(r_data=self.ref_data) if cols[0] == 'WGHT': data_type = cols[1] co.WEIGHTS[data_type] = float(cols[2]) if cols[0] == 'STEP': param_type = cols[1] co.STEPS[param_type] = float(cols[2])
# 执行一个文件的方式: # 在cmd中执行,在python执行:直接执行这个文件 # 导入这个文件 # 都是py文件 # 直接运行这个文件,这个文件就是一个脚本 # 导入这个文件,这个文件就是一个模块 # import my_module import calculate ret = calculate.main('1*9+3') print(ret) # 当一个py文件 # 当做一个脚本的时候:能够独立的提供一个功能,能自主完成交互 # 当成一个模块的时候:能够被导入调用这个功能,不能自主交互 # 一个文件中的__name__变量 # 当这个文件被当做脚本执行的时候__name__ == '__main__' # 当这个文件被当作模块导入的时候__name__ == '模块的名字'