def khan_calculator(ani_model, khan_network, numb_networks): """ Return a calculator from a roitberg model. choices are %s """ % " ".join(models.keys()) if khan_network is None: print("setting up Roitberg network") model_file = models[ani_model] wkdir = model_file.rsplit('/', 1)[0] + '/' data = np.loadtxt(model_file, dtype=str) cnstfile = wkdir + data[0] # AEV parameters saefile = wkdir + data[1] # Atomic shifts nnfdir = wkdir + data[2] # network prefix Nn = int(data[3]) # Number of networks in the ensemble assert numb_networks <= Nn network_dir = nnfdir[:-5] else: print("setting up khan network") saefile = os.path.join(os.environ["KHAN"], "data", "sae_linfit.dat" #"sae_wb97x.dat" ) network_dir = None ani_lib = os.path.join(os.environ["KHAN"], "gpu_featurizer", "ani_cpu.so") initialize_module(ani_lib) cp = tf.ConfigProto( intra_op_parallelism_threads=1, inter_op_parallelism_threads=1, ) tf_sess = tf.Session(config=cp) atomic_energies = read_sae_parameters(saefile) calculator = Calculator(tf_sess, atomic_energies, numb_networks, khan_saved_network=khan_network, roitberg_network=network_dir) return calculator
def main(): #avail_gpus = get_available_gpus() #print("Available GPUs:", avail_gpus) print('os.environ:', os.environ) config = tf.ConfigProto(allow_soft_placement=True) with tf.Session(config=config) as sess: # must be at start to reserve GPUs parser = argparse.ArgumentParser( description="Run ANI1 neural net training.", formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument( '--ani_lib', required=True, help="Location of the shared object for GPU featurization") parser.add_argument('--fitted', default=False, action='store_true', help="Whether or use fitted energy corrections") parser.add_argument('--add_ffdata', default=True, action='store_true', help="Whether or not to add the forcefield data") parser.add_argument('--gpus', default='4', help="Number of GPUs to use") parser.add_argument( '--cpus', default='1', help="Number of CPUs to use (GPUs override this if > 0)") parser.add_argument( '--start_batch_size', default='64', help= "How many training points to consider before calculating each gradient" ) parser.add_argument( '--max_local_epoch_count', default='50', help="How many epochs to try each learning rate before reducing it" ) parser.add_argument('--dataset_index', default='0', help="Index of training set to use") parser.add_argument('--testset_index', default='0', help="Index of test set to use") parser.add_argument( '--fit_charges', default=False, action='store_true', help="Whether or not to add fitted charge energies") parser.add_argument('--work-dir', default='~/work', help="location where work data is dumped") parser.add_argument('--train-dir', default='/home/yzhao/ANI-1_release', help="location where work data is dumped") parser.add_argument('--restart', default=False, action='store_true', help="Whether to restart from the save dir") parser.add_argument( '--train_size', default='0.5', help="how much of the dataset to use for gradient evaluations") parser.add_argument( '--test_size', default='0.5', help="how much of the dataset to use for testing the energies") args = parser.parse_args() print("Arguments", args) lib_path = os.path.abspath(args.ani_lib) print("Loading custom kernel from", lib_path) initialize_module(lib_path) ANI_TRAIN_DIR = args.train_dir ANI_WORK_DIR = args.work_dir GRAPH_DB_TRAIN_DIR = '/nfs/working/scidev/stevenso/learning/khan/graphdb_xyz/xyz/train' GRAPH_DB_TEST_DIR = '/nfs/working/scidev/stevenso/learning/khan/graphdb_xyz/xyz/test/' train_size = float(args.train_size) test_size = float(args.test_size) CALIBRATION_FILE_TRAIN = os.path.join(ANI_TRAIN_DIR, "results_QM_M06-2X.txt") CALIBRATION_FILE_TEST = os.path.join(ANI_TRAIN_DIR, "gdb_11_cal.txt") ROTAMER_TRAIN_DIR = [ os.path.join(ANI_TRAIN_DIR, "rotamers/train"), os.path.join(ANI_TRAIN_DIR, "rotamers/test") ] ROTAMER_TEST_DIR = os.path.join(ANI_TRAIN_DIR, "rotamers/test") CHARGED_ROTAMER_TEST_DIR = os.path.join(ANI_TRAIN_DIR, "charged_rotamers_2") CCSDT_ROTAMER_TEST_DIR = os.path.join(ANI_TRAIN_DIR, "ccsdt_dataset") save_dir = os.path.join(ANI_WORK_DIR, "save") if os.path.isdir(save_dir) and not args.restart: print('save_dir', save_dir, 'exists and this is not a restart job') exit() batch_size = int(args.start_batch_size) use_fitted = args.fitted add_ffdata = args.add_ffdata data_loader = DataLoader(use_fitted) print("------------Load evaluation data--------------") pickle_files = [ 'eval_new_graphdb.pickle', 'eval_data_old_fftest.pickle', 'eval_data_graphdb.pickle', 'rotamer_gdb_opt.pickle' ] pickle_file = pickle_files[int(args.testset_index)] if os.path.isfile(pickle_file): print('Loading pickle from', pickle_file) rd_gdb11, rd_ffneutral_mo62x, ffneutral_groups_mo62x, rd_ffneutral_ccsdt, ffneutral_groups_ccsdt, rd_ffcharged_mo62x, ffcharged_groups_mo62x = pickle.load( open(pickle_file, "rb")) # backwards compatibility for pickle files: add all_grads = None rd_gdb11.all_grads = None rd_ffneutral_mo62x.all_grads = None rd_ffneutral_ccsdt.all_grads = None rd_ffcharged_mo62x.all_grads = None #rd_gdb11, rd_ffneutral_mo62x, ffneutral_groups_mo62x, rd_ffneutral_ccsdt, ffneutral_groups_ccsdt, rd_ffcharged_mo62x, ffcharged_groups_mo62x, rd_gdb_opt, gdb_opt_groups = pickle.load( open(pickle_file, "rb") ) pickle.dump((rd_gdb11, rd_ffneutral_mo62x, ffneutral_groups_mo62x, rd_ffneutral_ccsdt, ffneutral_groups_ccsdt, rd_ffcharged_mo62x, ffcharged_groups_mo62x), open(pickle_file, "wb")) else: print('gdb11') xs, ys = data_loader.load_gdb11(ANI_TRAIN_DIR, CALIBRATION_FILE_TEST) rd_gdb11 = RawDataset(xs, ys) xs, ys, ffneutral_groups_mo62x = data_loader.load_ff( GRAPH_DB_TEST_DIR) rd_ffneutral_mo62x = RawDataset(xs, ys) xs, ys, ffneutral_groups_ccsdt = data_loader.load_ff( CCSDT_ROTAMER_TEST_DIR) rd_ffneutral_ccsdt = RawDataset(xs, ys) xs, ys, ffcharged_groups_mo62x = data_loader.load_ff( CHARGED_ROTAMER_TEST_DIR) rd_ffcharged_mo62x = RawDataset(xs, ys) xs, ys, gdb_opt_groups = data_loader.load_ff('haoyu_opt/xyz/') rd_gdb_opt = RawDataset(xs, ys) print('Pickling data...') pickle.dump((rd_gdb11, rd_ffneutral_mo62x, ffneutral_groups_mo62x, rd_ffneutral_ccsdt, ffneutral_groups_ccsdt, rd_ffcharged_mo62x, ffcharged_groups_mo62x, rd_gdb_opt, gdb_opt_groups), open(pickle_file, "wb")) eval_names = [ "Neutral Rotamers", "Neutral Rotamers CCSDT", "Charged Rotamers", "GDB Opt" ] #eval_groups = [ffneutral_groups_mo62x, ffneutral_groups_ccsdt, ffcharged_groups_mo62x, gdb_opt_groups] #eval_datasets = [rd_ffneutral_mo62x, rd_ffneutral_ccsdt, rd_ffcharged_mo62x, rd_gdb_opt] eval_names = [ "Neutral Rotamers", "Neutral Rotamers CCSDT", "Charged Rotamers" ] eval_groups = [ ffneutral_groups_mo62x, ffneutral_groups_ccsdt, ffcharged_groups_mo62x ] eval_datasets = [ rd_ffneutral_mo62x, rd_ffneutral_ccsdt, rd_ffcharged_mo62x ] # This training code implements cross-validation based training, whereby we determine convergence on a given # epoch depending on the cross-validation error for a given validation set. When a better cross-validation # score is detected, we save the model's parameters as the putative best found parameters. If after more than # max_local_epoch_count number of epochs have been run and no progress has been made, we decrease the learning # rate and restore the best found parameters. max_local_epoch_count = int(args.max_local_epoch_count) n_gpus = int(args.gpus) # min( int(args.gpus), len(avail_gpus) ) n_cpus = min(int(args.cpus), os.cpu_count()) if n_gpus > 0: towers = ["/gpu:" + str(i) for i in range(n_gpus)] else: towers = ["/cpu:" + str(i) for i in range(n_cpus)] print("towers:", towers) #layer_sizes=(128, 128, 64, 1) # original layer_sizes = (512, 256, 128, 1) #layer_sizes=(256, 256, 256, 256, 256, 256, 256, 128, 64, 8, 1) # bigNN #layer_sizes=tuple( 20*[128] + [1] ) #layer_sizes=(1,) # linear print('layer_sizes:', layer_sizes) n_weights = sum([ layer_sizes[i] * layer_sizes[i + 1] for i in range(len(layer_sizes) - 1) ]) print('n_weights:', n_weights) print("------------Load training data--------------") pickle_files = [ "gdb8_fftrain_fftest_xy.pickle", "gdb8_graphdb_xy.pickle", "gdb8_xy.pickle", "gdb7_xy.pickle", "gdb6_ffdata_xy.pickle", "gdb3_xy.pickle", "gdb8_graphdb_xy_differ3.pickle" ] pickle_file = pickle_files[int(args.dataset_index)] if os.path.isfile(pickle_file): print('Loading pickle from', pickle_file) Xs, ys = pickle.load(open(pickle_file, "rb")) else: ff_train_dirs = ROTAMER_TRAIN_DIR + [GRAPH_DB_TRAIN_DIR] Xs, ys = data_loader.load_gdb8(ANI_TRAIN_DIR, CALIBRATION_FILE_TRAIN, ff_train_dirs) print('Pickling data...') pickle.dump((Xs, ys), open(pickle_file, "wb")) print("------------Initializing model--------------") X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split( Xs, ys, train_size=train_size, test_size=test_size) # stratify by UTT would be good to try here rd_train, rd_test = RawDataset(X_train, y_train), RawDataset(X_test, y_test) print('n_train =', len(y_train), 'n_test =', len(y_test)) trainer = TrainerMultiTower( sess, towers=towers, layer_sizes=layer_sizes, fit_charges=args.fit_charges, precision=tf. float32 # train in single precision (james you may want to change this later) #precision=tf.float64 ) if os.path.exists(save_dir): print("Restoring existing model from", save_dir) trainer.load_numpy(save_dir + '/best.npz') else: # initialize new random parameters trainer.initialize() for name, ff_data, ff_groups in zip(eval_names, eval_datasets, eval_groups): print( name, "abs/rel rmses: {0:.6f} kcal/mol | ".format( trainer.eval_abs_rmse(ff_data)) + "{0:.6f} kcal/mol".format( trainer.eval_eh_rmse(ff_data, ff_groups))) print("------------Starting Training--------------") trainer.train(save_dir, rd_train, rd_test, rd_gdb11, eval_names, eval_datasets, eval_groups, batch_size, max_local_epoch_count)
def main(): args = parse_args(sys.argv) lib_path = os.path.abspath(args.ani_lib) initialize_module(lib_path) save_dir = os.path.join(args.work_dir, "save") config = tf.ConfigProto(allow_soft_placement=True) with tf.Session(config=config) as sess: layer_sizes = (128, 128, 64, 1) if args.deep_network: layer_sizes = (256, 256, 256, 256, 256, 256, 256, 128, 64, 8, 1) towers = ["/cpu:0"] print("start with layers", layer_sizes) trainer = TrainerMultiTower( sess, towers, layer_sizes=layer_sizes, fit_charges=args.fit_charges, gaussian_activation=args.gaussian_activation) trainer.load(save_dir) s = client_server.connect_socket(args.host, args.port, server=True) if args.debug: print("Server listening on port %d" % args.port) while True: if args.debug: print("awaiting connection...") conn, addr = s.accept() if args.debug: print("Connection established...") while True: rcv_data = client_server.recieve(conn) print("recieved data", rcv_data) if rcv_data: X = json.loads(rcv_data).get('X') X_np = np.array(X, dtype=np.float32) rd = RawDataset([X_np], [0.0]) # should I go back to total energy? energy = float(trainer.predict(rd)[0]) self_interaction = sum( data_utils.selfIxnNrgWB97X_631gdp[example[0]] for example in X) energy += self_interaction gradient = list(trainer.coordinate_gradients(rd))[0] natoms, ndim = gradient.shape gradient = gradient.reshape(natoms * ndim) if args.fdiff_grad: fd_gradient = fdiff_grad(X_np, trainer) dg = gradient - fd_gradient grms = np.sqrt(sum(dg[:]**2.0) / (natoms * ndim)) dot = np.dot(gradient, fd_gradient) norm_g = np.sqrt(np.dot(gradient, gradient)) norm_fd = np.sqrt(np.dot(fd_gradient, fd_gradient)) dot = np.dot(gradient, fd_gradient) / (norm_fd * norm_g) gradient[:] = fd_gradient[:] print("RMS gradient fdiff/analytic %.4e" % grms) print("Gradient dot product %.4f" % dot) # convert gradient from hartree/angstrom to hartree/bohr # and to jsonable format gradient = [float(g) * BOHR for g in gradient] print("sending gradient") print(gradient) send_data = json.dumps({ "energy": energy, "gradient": gradient }) print("sending response...") client_server.send(conn, send_data) else: break
def main(): args = parse_args(sys.argv) lib_path = os.path.abspath(args.ani_lib) initialize_module(lib_path) save_file = os.path.join(args.save_dir, "save_file.npz") if not os.path.exists(save_file): raise IOError("Saved NN numpy file does not exist") _, _, X_test, y_test, X_big, y_big = load_reactivity_data( args.reactivity_dir, 1.0) small_reactions, big_reactions = read_all_reactions(args.reactivity_dir) rd_test = RawDataset(X_test, y_test) rd_big = RawDataset(X_big, y_big) config = tf.ConfigProto(allow_soft_placement=True) with tf.Session(config=config) as sess: towers = ["/cpu:0"] layers = (128, 128, 64, 1) if args.deep_network: layers = (256, 256, 256, 256, 256, 256, 256, 128, 64, 8, 1) activation_fn = activations.get_fn_by_name(args.activation_function) trainer = TrainerMultiTower( sess, towers=towers, precision=tf.float64, layer_sizes=layers, activation_fn=activation_fn, fit_charges=args.fit_charges, ) trainer.load_numpy(save_file) if args.analyze_reaction_errors: if not os.path.exists("small_reactions_comparison"): os.mkdir("small_reactions_comparison") if not os.path.exists("big_reactions_comparison"): os.mkdir("big_reactions_comparison") for dataname, data in (("small_reactions", small_reactions), ("big_reactions", big_reactions)): # get reactant, TS product Xr, Er = [], [] Xts, Ets = [], [] Xp, Ep = [], [] for name in data: Xs, Es = data[name] if args.write_comparison_data: # make a directory HERE directory = dataname + "_comparison" write_reaction_data(os.path.join(directory, name), Xs, Es, trainer) Xr.append(Xs[0]) Er.append(Es[0]) Xp.append(Xs[-1]) Ep.append(Es[-1]) # ts is highest energy point along path emax = max(Es) idx = Es.index(emax) Xts.append(Xs[idx]) Ets.append(Es[idx]) # make datasets rd_r = RawDataset(Xr, Er) rd_p = RawDataset(Xp, Ep) rd_ts = RawDataset(Xts, Ets) Er = np.array(Er) Ep = np.array(Ep) Ets = np.array(Ets) # predict energies r_predictions = np.array(trainer.predict(rd_r)) p_predictions = np.array(trainer.predict(rd_p)) ts_predictions = np.array(trainer.predict(rd_ts)) barriers = (Ets - Er) * KCAL reverse_barriers = (Ets - Ep) * KCAL predicted_barriers = (ts_predictions - r_predictions) * KCAL predicted_reverse_barriers = (ts_predictions - p_predictions) * KCAL rxn_e = (Ep - Er) * KCAL predicted_rxn_e = (p_predictions - r_predictions) * KCAL barrier_errors = barriers - predicted_barriers barrier_rmse = np.sqrt( sum(barrier_errors[:]**2.0) / len(barrier_errors)) reverse_barrier_errors = reverse_barriers - predicted_reverse_barriers reverse_barrier_rmse = np.sqrt( sum(reverse_barrier_errors[:]**2.0) / len(reverse_barrier_errors)) rxn_errors = rxn_e - predicted_rxn_e rxn_rmse = np.sqrt(sum(rxn_errors[:]**2.0) / len(rxn_errors)) # barrier height plot bmu, bsigma = histogram(barrier_errors, "Barrier height errors") rbmu, rbsigma = histogram(reverse_barrier_errors, "Reverse Barrier height errors") rmu, rsigma = histogram(rxn_errors, "Reaction energy errors") plt.xlabel("Error (kcal/mol)") plt.title("Reaction energetic errors for %s" % dataname) plt.legend() #plt.scatter(barriers, predicted_barriers) #plt.scatter(rxn_e, predicted_rxn_e) plt.savefig("%s_barrier_height_errors.pdf" % dataname) plt.clf() print("errors for %s" % dataname) print("Barrier RMSE %.2f rxn RMSE %.2f" % (barrier_rmse, rxn_rmse)) print("Reverse Barrier RMSE %.2f" % reverse_barrier_rmse) print("rxn mu %f sigma %f" % (rmu, rsigma)) print("barrier mu %f sigma %f" % (bmu, bsigma)) print("reverse barrier mu %f sigma %f" % (rbmu, rbsigma)) # plot distribution of raw errors if args.analyze_raw_errors: #evaluate errors in predictions rxn_predictions = trainer.predict(rd_test) big_predictions = trainer.predict(rd_big) rxn_errors = np.array(rxn_predictions) - np.array(y_test) big_errors = np.array(big_predictions) - np.array(y_big) rxn_rmse = np.sqrt(sum(rxn_errors[:]**2.0) / len(rxn_errors)) big_rmse = np.sqrt(sum(big_errors[:]**2.0) / len(big_errors)) rxn_errors = rxn_errors * KCAL big_errors = big_errors * KCAL print("small rmse %.4f big rmse %.4f" % (rxn_rmse * KCAL, big_rmse * KCAL)) smu, ssigma = histogram( rxn_errors, "Atomization energy errors for small systems") bmu, bsigma = histogram( big_errors, "Atomization energy errors for large systems") plt.xlabel("Error (kcal/mol)") plt.title("Atomization energy errors") plt.legend() plt.savefig("atomization_errors.pdf") plt.clf() print("small atomization mu %f sigma %f" % (smu, ssigma)) print("big atomization mu %f sigma %f" % (bmu, bsigma))
def main(): parser = argparse.ArgumentParser( description="Run ANI1 neural net training.", formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument( '--ani-lib', required=True, help="Location of the shared object for GPU featurization") parser.add_argument('--fitted', default=False, action='store_true', help="Whether or use fitted or self-ixn") parser.add_argument('--add_ffdata', default=False, action='store_true', help="Whether or not to add the forcefield data") parser.add_argument('--gpus', default=1, help="Number of gpus we use") parser.add_argument('--train_forces', default=True, help="If we train to the forces") parser.add_argument('--save-dir', default='~/work', help="location where save data is dumped") parser.add_argument('--train-dir', default='~/ANI-1_release', help="location where work data is dumped") args = parser.parse_args() print("Arguments", args) lib_path = os.path.abspath(args.ani_lib) print("Loading custom kernel from", lib_path) initialize_module(lib_path) ANI_TRAIN_DIR = args.train_dir ANI_SAVE_DIR = args.save_dir save_dir = os.path.join(ANI_SAVE_DIR, "save") use_fitted = args.fitted add_ffdata = args.add_ffdata data_loader = DataLoader(False) all_Xs, all_Ys = data_loader.load_gdb8(ANI_TRAIN_DIR) # todo: ensure disjunction in train_test_valid X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split( all_Xs, all_Ys, test_size=0.25) # stratify by UTT would be good to try here rd_train, rd_test = RawDataset(X_train, y_train), RawDataset(X_test, y_test) X_gdb11, y_gdb11 = data_loader.load_gdb11(ANI_TRAIN_DIR) rd_gdb11 = RawDataset(X_gdb11, y_gdb11) batch_size = 1024 config = tf.ConfigProto(allow_soft_placement=True) all_Xs_f, all_Ys_f, all_Fs_f = data_loader.load_gdb8_forces( ANI_TRAIN_DIR) # todo: figure out how to split this consistently later rd_train_forces = RawDataset(all_Xs_f, all_Ys_f, all_Fs_f) with tf.Session(config=config) as sess: # This training code implements cross-validation based training, whereby we determine convergence on a given # epoch depending on the cross-validation error for a given validation set. When a better cross-validation # score is detected, we save the model's parameters as the putative best found parameters. If after more than # max_local_epoch_count number of epochs have been run and no progress has been made, we decrease the learning # rate and restore the best found parameters. n_gpus = int(args.gpus) if n_gpus > 0: towers = ["/gpu:" + str(i) for i in range(n_gpus)] else: towers = [ "/cpu:" + str(i) for i in range(multiprocessing.cpu_count()) ] print("towers:", towers) trainer = TrainerMultiTower( sess, towers=towers, precision=tf.float32, layer_sizes=(128, 128, 64, 1), # fit_charges=True, ) # if os.path.exists(save_dir): # print("Restoring existing model from", save_dir) # trainer.load(save_dir) # else: trainer.initialize() # initialize to random variables max_local_epoch_count = 10 train_ops = [ trainer.global_epoch_count, trainer.learning_rate, trainer.local_epoch_count, trainer.unordered_l2s, trainer.train_op, ] print("------------Starting Training--------------") start_time = time.time() train_forces = bool(int(args.train_forces)) # python is retarded # training with forces while sess.run( trainer.learning_rate ) > 5e-10: # this is to deal with a numerical error, we technically train to 1e-9 while sess.run(trainer.local_epoch_count) < max_local_epoch_count: start_time = time.time() # train to forces if train_forces: train_results_forces = list( trainer.feed_dataset( rd_train_forces, shuffle=True, target_ops=[ trainer.train_op_forces, trainer.tower_force_rmses ], batch_size=batch_size, before_hooks=trainer.max_norm_ops)) print(train_results_forces, end=" | ") #train to energies train_results_energies = list( trainer.feed_dataset(rd_train, shuffle=True, target_ops=train_ops, batch_size=batch_size, before_hooks=trainer.max_norm_ops)) train_abs_rmse = np.sqrt( np.mean(flatten_results(train_results_energies, pos=3))) * HARTREE_TO_KCAL_PER_MOL test_abs_rmse = trainer.eval_abs_rmse(rd_test) gdb11_abs_rmse = trainer.eval_abs_rmse(rd_gdb11) print(time.time() - start_time, train_abs_rmse, test_abs_rmse, gdb11_abs_rmse) print("==========Decreasing learning rate==========") sess.run(trainer.decr_learning_rate) sess.run(trainer.reset_local_epoch_count) trainer.load_best_params() return
def main(): parser = argparse.ArgumentParser( description="Run ANI1 neural net training.", formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument( '--ani_lib', required=True, help="Location of the shared object for GPU featurization") parser.add_argument('--fitted', default=False, action='store_true', help="Whether or use fitted or self-ixn") parser.add_argument('--add_ffdata', default=False, action='store_true', help="Whether or not to add the forcefield data") parser.add_argument('--gpus', default=1, help="Number of gpus we use") parser.add_argument('--work-dir', default='~/work', help="location where work data is dumped") parser.add_argument('--train-dir', default='~/ANI-1_release', help="location where work data is dumped") args = parser.parse_args() print("Arguments", args) lib_path = os.path.abspath(args.ani_lib) print("Loading custom kernel from", lib_path) initialize_module(lib_path) ANI_TRAIN_DIR = args.train_dir ANI_WORK_DIR = args.work_dir save_dir = os.path.join(ANI_WORK_DIR, "save") use_fitted = args.fitted add_ffdata = args.add_ffdata data_loader = DataLoader(False) all_Xs, all_Ys = data_loader.load_gdb8(ANI_TRAIN_DIR) X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split( all_Xs, all_Ys, test_size=0.25) # stratify by UTT would be good to try here rd_train, rd_test = RawDataset(X_train, y_train), RawDataset(X_test, y_test) X_gdb11, y_gdb11 = data_loader.load_gdb11(ANI_TRAIN_DIR) rd_gdb11 = RawDataset(X_gdb11, y_gdb11) batch_size = 1024 config = tf.ConfigProto(allow_soft_placement=True) with tf.Session(config=config) as sess: # This training code implements cross-validation based training, whereby we determine convergence on a given # epoch depending on the cross-validation error for a given validation set. When a better cross-validation # score is detected, we save the model's parameters as the putative best found parameters. If after more than # max_local_epoch_count number of epochs have been run and no progress has been made, we decrease the learning # rate and restore the best found parameters. # n_gpus = int(args.gpus) # if n_gpus > 0: # towers = ["/gpu:"+str(i) for i in range(n_gpus)] # else: # towers = ["/cpu:"+str(i) for i in range(multiprocessing.cpu_count())] # print("towers:", towers) with tf.variable_scope("james"): trainer_james = TrainerMultiTower( sess, towers=["/gpu:0"], layer_sizes=(128, 128, 64, 1), fit_charges=True, ) with tf.variable_scope("yutong"): trainer_yutong = TrainerMultiTower( sess, towers=["/gpu:1"], layer_sizes=(128, 128, 64, 1), fit_charges=True, ) saver = tf.train.Saver() sess.run(tf.global_variables_initializer()) pool = ThreadPool(2) data = ((trainer_james, rd_train, rd_test), (trainer_yutong, rd_train, rd_test)) for e in range(10): pool.map(run_one_epoch, data) # need to use saver across all if not os.path.exists(save_dir): os.makedirs(save_dir) save_path = os.path.join(save_dir, "model.ckpt") saver.save(sess, save_path)
def main(): parser = argparse.ArgumentParser(description="Run ANI1 neural net training.", formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('--ani-lib', required=True, help="Location of the shared object for GPU featurization") parser.add_argument('--fitted', default=False, action='store_true', help="Whether or use fitted or self-ixn") parser.add_argument('--add-ffdata', default=False, action='store_true', help="Whether or not to add the forcefield data") parser.add_argument('--gpus', default=1, help="Number of gpus we use") parser.add_argument('--save-dir', default='~/work', help="Location where save data is dumped. If the folder does not exist then it will be created.") parser.add_argument('--train-dir', default='~/ANI-1_release', help="Location where training data is located") parser.add_argument( '--reactivity-dir', default=None, help='location of reactivity data' ) parser.add_argument( '--reactivity-test-percent', default=0.25, type=float, help='percent of reactions to put in test set' ) parser.add_argument( '--deep-network', action='store_true', help='Use James super deep network (256, 256, 256, 256, 256, 256, 256, 128, 64, 8, 1)' ) parser.add_argument( '--fit-charges', action='store_true', help='fit charges' ) parser.add_argument( '--activation-function', type=str, choices=activations.get_all_fn_names(), help='choice of activation function', default="celu" ) parser.add_argument( '--convert-checkpoint', default=False, action='store_true', help='Convert a checkpoint file to a numpy file and exit' ) parser.add_argument( '--precision', default='single', type=str, choices=PRECISION.keys(), help="Floating point precision of NN" ) args = parser.parse_args() print("Arguments", args) lib_path = os.path.abspath(args.ani_lib) print("Loading custom kernel from", lib_path) initialize_module(lib_path) ANI_TRAIN_DIR = args.train_dir ANI_SAVE_DIR = args.save_dir # save_dir = os.path.join(ANI_SAVE_DIR, "save") save_file = os.path.join(ANI_SAVE_DIR, "save_file.npz") use_fitted = args.fitted add_ffdata = args.add_ffdata data_loader = DataLoader(False) all_Xs, all_Ys = data_loader.load_gdb8(ANI_TRAIN_DIR) X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(all_Xs, all_Ys, test_size=0.25) # stratify by UTT would be good to try here rd_train, rd_test = RawDataset(X_train, y_train), RawDataset(X_test, y_test) X_gdb11, y_gdb11 = data_loader.load_gdb11(ANI_TRAIN_DIR) rd_gdb11 = RawDataset(X_gdb11, y_gdb11) rd_rxn_test, rd_rxn_train, rd_rxn_all, rd_rxn_big = \ (None, None, None, None) if args.reactivity_dir is not None: # add training data X_rxn_train, Y_rxn_train, X_rxn_test, Y_rxn_test, X_rxn_big, Y_rxn_big = \ load_reactivity_data(args.reactivity_dir, args.reactivity_test_percent) X_train.extend(X_rxn_train) y_train.extend(Y_rxn_train) print("Number of reactivity points in training set {0:d}".format(len(Y_rxn_train))) print("Number of reactivity points in test set {0:d}".format(len(Y_rxn_test))) # keep reaction test set separate rd_rxn_test = RawDataset(X_rxn_test, Y_rxn_test) if X_rxn_test else None rd_rxn_train = RawDataset(X_rxn_train, Y_rxn_train) if X_rxn_train else None # redundant, can be eliminated rd_rxn_all = RawDataset(X_rxn_test + X_rxn_train, Y_rxn_test + Y_rxn_train) # cannot currently handle this in test either # everything over 32 atoms rd_rxn_big = RawDataset(X_rxn_big, Y_rxn_big) batch_size = 1024 config = tf.ConfigProto(allow_soft_placement=True) with tf.Session(config=config) as sess: # This training code implements cross-validation based training, whereby we determine convergence on a given # epoch depending on the cross-validation error for a given validation set. When a better cross-validation # score is detected, we save the model's parameters as the putative best found parameters. If after more than # max_local_epoch_count number of epochs have been run and no progress has been made, we decrease the learning # rate and restore the best found parameters. n_gpus = int(args.gpus) if n_gpus > 0: towers = ["/gpu:"+str(i) for i in range(n_gpus)] else: towers = ["/cpu:"+str(i) for i in range(multiprocessing.cpu_count())] layers = (128, 128, 64, 1) if args.deep_network: layers = (256, 256, 256, 256, 256, 256, 256, 128, 64, 8, 1) print("Soft placing operations onto towers:", towers) activation_fn = activations.get_fn_by_name(args.activation_function) precision = PRECISION[args.precision] trainer = TrainerMultiTower( sess, towers=towers, precision=precision, layer_sizes=layers, activation_fn=activation_fn, fit_charges=args.fit_charges, ) if args.convert_checkpoint: print("Converting saved network to numpy") save_dir = os.path.join(args.save_dir, "save") trainer.load(save_dir) trainer.save_numpy(save_file) print("Complete, exiting") return if os.path.exists(save_file): print("Restoring existing model from", save_file) trainer.load_numpy(save_file) else: if not os.path.exists(ANI_SAVE_DIR): print("Save directory",ANI_SAVE_DIR,"does not existing... creating") os.makedirs(ANI_SAVE_DIR) trainer.initialize() # initialize to random variables max_local_epoch_count = 10 train_ops = [ trainer.global_epoch_count, trainer.learning_rate, trainer.local_epoch_count, trainer.unordered_l2s, trainer.train_op, ] best_test_score = trainer.eval_abs_rmse(rd_test) # Uncomment if you'd like to inspect the gradients # all_grads = [] # for grad in trainer.coordinate_gradients(rd_test): # all_grads.append(grad) # assert len(all_grads) == rd_test.num_mols() print("------------Starting Training--------------") start_time = time.time() while sess.run(trainer.learning_rate) > 5e-10: # this is to deal with a numerical error, we technically train to 1e-9 while sess.run(trainer.local_epoch_count) < max_local_epoch_count: # sess.run(trainer.max_norm_ops) # should this run after every batch instead? start_time = time.time() train_results = list(trainer.feed_dataset( rd_train, shuffle=True, target_ops=train_ops, batch_size=batch_size, before_hooks=trainer.max_norm_ops)) global_epoch = train_results[0][0] time_per_epoch = time.time() - start_time train_abs_rmse = np.sqrt(np.mean(flatten_results(train_results, pos=3))) * HARTREE_TO_KCAL_PER_MOL learning_rate = train_results[0][1] local_epoch_count = train_results[0][2] test_abs_rmse = trainer.eval_abs_rmse(rd_test) print(time.strftime("%Y-%m-%d %H:%M:%S"), 'tpe:', "{0:.2f}s,".format(time_per_epoch), 'g-epoch', global_epoch, 'l-epoch', local_epoch_count, 'lr', "{0:.0e}".format(learning_rate), \ 'train/test abs rmse:', "{0:.2f} kcal/mol,".format(train_abs_rmse), "{0:.2f} kcal/mol".format(test_abs_rmse), end='') if test_abs_rmse < best_test_score: gdb11_abs_rmse = trainer.eval_abs_rmse(rd_gdb11) print(' | gdb11 abs rmse', "{0:.2f} kcal/mol | ".format(gdb11_abs_rmse), end='') best_test_score = test_abs_rmse sess.run([trainer.incr_global_epoch_count, trainer.reset_local_epoch_count]) # info about reactivity training rxn_pairs = [ (rd_rxn_train, "train"), (rd_rxn_test, "test"), (rd_rxn_all, "all"), (rd_rxn_big, "big") ] for rd, name in rxn_pairs: if rd is not None: rxn_abs_rmse = trainer.eval_abs_rmse(rd) print( ' | reactivity abs rmse ({0:s})'.format(name), "{0:.2f} kcal/mol | ".format(rxn_abs_rmse), end='' ) # should really be a weighted ave if name == "test": best_test_score += rxn_abs_rmse else: sess.run([trainer.incr_global_epoch_count, trainer.incr_local_epoch_count]) trainer.save_numpy(save_file) print('', end='\n') print("==========Decreasing learning rate==========") sess.run(trainer.decr_learning_rate) sess.run(trainer.reset_local_epoch_count) # trainer.load_best_params() return
def main(): parser = argparse.ArgumentParser(description="Run ANI1 neural net training.", formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('--ani-lib', required=True, help="Location of the shared object for GPU featurization") parser.add_argument('--fitted', default=False, action='store_true', help="Whether or use fitted or self-ixn") parser.add_argument('--add-ffdata', default=False, action='store_true', help="Whether or not to add the forcefield data") parser.add_argument('--gpus', default=1, help="Number of gpus we use") parser.add_argument('--save-dir', default='~/work', help="Location where save data is dumped. If the folder does not exist then it will be created.") parser.add_argument('--train-dir', default='~/ANI-1_release', help="Location where training data is located") args = parser.parse_args() print("Arguments", args) lib_path = os.path.abspath(args.ani_lib) print("Loading custom kernel from", lib_path) initialize_module(lib_path) print("Available activation functions:", activations.get_all_fn_names()) ANI_TRAIN_DIR = args.train_dir ANI_SAVE_DIR = args.save_dir # save_dir = os.path.join(ANI_SAVE_DIR, "save") save_file = os.path.join(ANI_SAVE_DIR, "save_file.npz") use_fitted = args.fitted add_ffdata = args.add_ffdata data_loader = DataLoader(False) all_Xs, all_Ys = data_loader.load_gdb8(ANI_TRAIN_DIR) X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(all_Xs, all_Ys, test_size=0.25) # stratify by UTT would be good to try here rd_train, rd_test = RawDataset(X_train, y_train), RawDataset(X_test, y_test) X_gdb11, y_gdb11 = data_loader.load_gdb11(ANI_TRAIN_DIR) rd_gdb11 = RawDataset(X_gdb11, y_gdb11) batch_size = 1024 config = tf.ConfigProto(allow_soft_placement=True) with tf.Session(config=config) as sess: # This training code implements cross-validation based training, whereby we determine convergence on a given # epoch depending on the cross-validation error for a given validation set. When a better cross-validation # score is detected, we save the model's parameters as the putative best found parameters. If after more than # max_local_epoch_count number of epochs have been run and no progress has been made, we decrease the learning # rate and restore the best found parameters. n_gpus = int(args.gpus) if n_gpus > 0: towers = ["/gpu:"+str(i) for i in range(n_gpus)] else: towers = ["/cpu:"+str(i) for i in range(multiprocessing.cpu_count())] print("Soft placing operations onto towers:", towers) # activation_fn = activations.get_fn_by_name("celu") # if you want to use the command line. activation_fn = activations.celu # preferred # activation_fn = tf.nn.selu # activation_fn = functools.partial(tf.nn.leaky_relu, alpha=0.2) # activation_fn = activations.get_fn_by_name("normal", 0.5, 0.2) trainer = TrainerMultiTower( sess, towers=towers, precision=tf.float32, layer_sizes=(128, 128, 64, 1), activation_fn=activation_fn, fit_charges=False, ) if os.path.exists(save_file): print("Restoring existing model from", save_file) trainer.load_numpy(save_file) else: if not os.path.exists(ANI_SAVE_DIR): print("Save directory",ANI_SAVE_DIR,"does not existing... creating") os.makedirs(ANI_SAVE_DIR) trainer.initialize() # initialize to random variables max_local_epoch_count = 10 train_ops = [ trainer.global_epoch_count, trainer.learning_rate, trainer.local_epoch_count, trainer.unordered_l2s, trainer.train_op, ] best_test_score = trainer.eval_abs_rmse(rd_test) # Uncomment if you'd like to inspect the gradients # all_grads = [] # for grad in trainer.coordinate_gradients(rd_test): # all_grads.append(grad) # assert len(all_grads) == rd_test.num_mols() print("------------Starting Training--------------") start_time = time.time() while sess.run(trainer.learning_rate) > 5e-10: # this is to deal with a numerical error, we technically train to 1e-9 while sess.run(trainer.local_epoch_count) < max_local_epoch_count: # sess.run(trainer.max_norm_ops) # should this run after every batch instead? start_time = time.time() train_results = list(trainer.feed_dataset( rd_train, shuffle=True, target_ops=train_ops, batch_size=batch_size, before_hooks=trainer.max_norm_ops)) global_epoch = train_results[0][0] time_per_epoch = time.time() - start_time train_abs_rmse = np.sqrt(np.mean(flatten_results(train_results, pos=3))) * HARTREE_TO_KCAL_PER_MOL learning_rate = train_results[0][1] local_epoch_count = train_results[0][2] test_abs_rmse = trainer.eval_abs_rmse(rd_test) print(time.strftime("%Y-%m-%d %H:%M:%S"), 'tpe:', "{0:.2f}s,".format(time_per_epoch), 'g-epoch', global_epoch, 'l-epoch', local_epoch_count, 'lr', "{0:.0e}".format(learning_rate), \ 'train/test abs rmse:', "{0:.2f} kcal/mol,".format(train_abs_rmse), "{0:.2f} kcal/mol".format(test_abs_rmse), end='') if test_abs_rmse < best_test_score: gdb11_abs_rmse = trainer.eval_abs_rmse(rd_gdb11) print(' | gdb11 abs rmse', "{0:.2f} kcal/mol | ".format(gdb11_abs_rmse), end='') best_test_score = test_abs_rmse sess.run([trainer.incr_global_epoch_count, trainer.reset_local_epoch_count]) else: sess.run([trainer.incr_global_epoch_count, trainer.incr_local_epoch_count]) trainer.save_numpy(save_file) print('', end='\n') print("==========Decreasing learning rate==========") sess.run(trainer.decr_learning_rate) sess.run(trainer.reset_local_epoch_count) # trainer.load_best_params() return