def __init__(self): # setup with open(os.path.join(os.path.dirname(__file__), 'water_se_a.json')) as fp: jdata = json.load(fp) self.run_opt = RunOptions(None) self.run_opt.verbose = False self.model = NNPTrainer(jdata, run_opt=self.run_opt) rcut = self.model.model.get_rcut() type_map = self.model.model.get_type_map() systems = [os.path.join(os.path.dirname(__file__), 'data')] set_pfx = jdata['training']['set_prefix'] seed = jdata['training']['seed'] np.random.seed(seed) batch_size = jdata['training']['batch_size'] test_size = jdata['training']['numb_test'] self.data = DeepmdDataSystem(systems, batch_size, test_size, rcut, set_prefix=set_pfx, run_opt=self.run_opt, type_map=type_map) self.data.add_dict(data_requirement) self.model.build(self.data) self.model._init_sess_serial() cur_batch = self.model.sess.run(self.model.global_step) self.cur_batch = cur_batch
def _do_work(jdata, run_opt): # init the model model = NNPTrainer (jdata, run_opt = run_opt) rcut = model.model.get_rcut() type_map = model.model.get_type_map() # init params and run options assert('training' in jdata) systems = j_must_have(jdata['training'], 'systems') set_pfx = j_must_have(jdata['training'], 'set_prefix') numb_sys = len(systems) seed = None if 'seed' in jdata['training'].keys() : seed = jdata['training']['seed'] if seed is not None: seed = seed % (2**32) np.random.seed (seed) batch_size = j_must_have(jdata['training'], 'batch_size') test_size = j_must_have(jdata['training'], 'numb_test') stop_batch = j_must_have(jdata['training'], 'stop_batch') if len(type_map) == 0: # empty type_map ipt_type_map = None else: ipt_type_map = type_map data = DeepmdDataSystem(systems, batch_size, test_size, rcut, set_prefix=set_pfx, run_opt=run_opt, type_map = ipt_type_map) data.add_dict(data_requirement) # build the model with stats from the first system model.build (data) # train the model with the provided systems in a cyclic way start_time = time.time() cur_batch = 0 model.train (data, stop_batch) end_time = time.time() run_opt.message("finished training\nwall time: %.3f s" % (end_time-start_time))
def test_ener_shift(self): np.random.seed(0) data = DeepmdDataSystem(['system_0', 'system_1'], 5, 10, 1.0) data.add('energy', 1, must=True) ener_shift0 = data.compute_energy_shift(rcond=1) all_stat = make_all_stat(data, 4, merge_sys=False) ener_shift1 = EnerFitting._compute_output_stats(all_stat, rcond=1) for ii in range(len(ener_shift0)): self.assertAlmostEqual(ener_shift0[ii], ener_shift1[ii])
def test_ntypes(self): batch_size = 3 test_size = 2 ds = DeepmdDataSystem(self.sys_name, batch_size, test_size, 2.0) ds.add('test', self.test_ndof, atomic=True, must=True) ds.add('null', self.test_ndof, atomic=True, must=False) self.assertEqual(ds.get_ntypes(), 3) self.assertEqual(ds.get_nbatches(), [2, 4, 3, 2]) self.assertEqual(ds.get_nsystems(), self.nsys) self.assertEqual(list(ds.get_batch_size()), [batch_size] * 4)
class Benchmark: def __init__(self): # setup with open(os.path.join(os.path.dirname(__file__), 'water_se_a.json')) as fp: jdata = json.load(fp) self.run_opt = RunOptions(None) self.run_opt.verbose = False self.model = NNPTrainer(jdata, run_opt=self.run_opt) rcut = self.model.model.get_rcut() type_map = self.model.model.get_type_map() systems = [os.path.join(os.path.dirname(__file__), 'data')] set_pfx = jdata['training']['set_prefix'] seed = jdata['training']['seed'] np.random.seed(seed) batch_size = jdata['training']['batch_size'] test_size = jdata['training']['numb_test'] self.data = DeepmdDataSystem(systems, batch_size, test_size, rcut, set_prefix=set_pfx, run_opt=self.run_opt, type_map=type_map) self.data.add_dict(data_requirement) self.model.build(self.data) self.model._init_sess_serial() cur_batch = self.model.sess.run(self.model.global_step) self.cur_batch = cur_batch def train_step(self): batch_data = self.data.get_batch(sys_weights=self.model.sys_weights) feed_dict_batch = {} for kk in batch_data.keys(): if kk == 'find_type' or kk == 'type': continue if 'find_' in kk: feed_dict_batch[self.model.place_holders[kk]] = batch_data[kk] else: feed_dict_batch[self.model.place_holders[kk]] = np.reshape( batch_data[kk], [-1]) for ii in ['type']: feed_dict_batch[self.model.place_holders[ii]] = np.reshape( batch_data[ii], [-1]) for ii in ['natoms_vec', 'default_mesh']: feed_dict_batch[self.model.place_holders[ii]] = batch_data[ii] feed_dict_batch[self.model.place_holders['is_training']] = True self.model.sess.run([self.model.train_op], feed_dict=feed_dict_batch, options=None, run_metadata=None) self.model.sess.run(self.model.global_step)
def test_prob_sys_size_1(self): batch_size = 1 test_size = 1 ds = DeepmdDataSystem(self.sys_name, batch_size, test_size, 2.0) prob = ds._prob_sys_size_ext("prob_sys_size; 1:2:0.4; 2:4:1.6") self.assertAlmostEqual(np.sum(prob), 1) self.assertAlmostEqual(np.sum(prob[1:2]), 0.2) self.assertAlmostEqual(np.sum(prob[2:4]), 0.8) # number of training set is self.nset-1 # shift is the total number of set size shift... shift = np.sum(np.arange(self.nset - 1)) self.assertAlmostEqual(prob[0], 0.0) self.assertAlmostEqual(prob[1], 0.2) self.assertAlmostEqual( prob[3] / prob[2], float(self.nframes[3] * (self.nset - 1) + shift) / float(self.nframes[2] * (self.nset - 1) + shift))
def _setUp(self): args = Args() run_opt = RunOptions(args, False) with open (args.INPUT, 'r') as fp: jdata = json.load (fp) # init model model = NNPTrainer (jdata, run_opt = run_opt) rcut = model.model.get_rcut() # init data system systems = j_must_have(jdata['training'], 'systems') set_pfx = j_must_have(jdata['training'], 'set_prefix') batch_size = j_must_have(jdata['training'], 'batch_size') test_size = j_must_have(jdata['training'], 'numb_test') data = DeepmdDataSystem(systems, batch_size, test_size, rcut, set_prefix=set_pfx) data.add_dict(data_requirement) # clear the default graph tf.reset_default_graph() # build the model with stats from the first system model.build (data) # freeze the graph with tf.Session() as sess: init_op = tf.global_variables_initializer() sess.run(init_op) graph = tf.get_default_graph() input_graph_def = graph.as_graph_def() nodes = "o_dipole,o_rmat,o_rmat_deriv,o_nlist,o_rij,descrpt_attr/rcut,descrpt_attr/ntypes,descrpt_attr/sel,descrpt_attr/ndescrpt,model_attr/tmap,model_attr/sel_type,model_attr/model_type" output_graph_def = tf.graph_util.convert_variables_to_constants( sess, input_graph_def, nodes.split(",") ) output_graph = os.path.join(modifier_datapath, 'dipole.pb') with tf.gfile.GFile(output_graph, "wb") as f: f.write(output_graph_def.SerializeToString())
def _do_work(jdata, run_opt): # init the model model = NNPTrainer(jdata, run_opt=run_opt) rcut = model.model.get_rcut() type_map = model.model.get_type_map() # init params and run options assert ('training' in jdata) systems = j_must_have(jdata['training'], 'systems') if type(systems) == str: systems = expand_sys_str(systems) set_pfx = j_must_have(jdata['training'], 'set_prefix') seed = None if 'seed' in jdata['training'].keys(): seed = jdata['training']['seed'] if seed is not None: seed = seed % (2**32) np.random.seed(seed) batch_size = j_must_have(jdata['training'], 'batch_size') test_size = j_must_have(jdata['training'], 'numb_test') stop_batch = j_must_have(jdata['training'], 'stop_batch') sys_probs = jdata['training'].get('sys_probs') auto_prob_style = jdata['training'].get('auto_prob_style', 'prob_sys_size') if len(type_map) == 0: # empty type_map ipt_type_map = None else: ipt_type_map = type_map # data modifier modifier = None modi_data = jdata['model'].get("modifier", None) if modi_data is not None: if modi_data['type'] == 'dipole_charge': modifier = DipoleChargeModifier(modi_data['model_name'], modi_data['model_charge_map'], modi_data['sys_charge_map'], modi_data['ewald_h'], modi_data['ewald_beta']) else: raise RuntimeError('unknown modifier type ' + str(modi_data['type'])) # init data data = DeepmdDataSystem(systems, batch_size, test_size, rcut, set_prefix=set_pfx, type_map=ipt_type_map, modifier=modifier) data.print_summary(run_opt, sys_probs=sys_probs, auto_prob_style=auto_prob_style) data.add_dict(data_requirement) # build the model with stats from the first system model.build(data, stop_batch) # train the model with the provided systems in a cyclic way start_time = time.time() model.train(data) end_time = time.time() run_opt.message("finished training\nwall time: %.3f s" % (end_time - start_time))
def test_get_test(self): batch_size = 3 test_size = 2 ds = DeepmdDataSystem(self.sys_name, batch_size, test_size, 2.0) ds.add('test', self.test_ndof, atomic=True, must=True) ds.add('null', self.test_ndof, atomic=True, must=False) sys_idx = 0 data = ds.get_test(sys_idx=sys_idx) self.assertEqual(list(data['type'][0]), list(np.sort(self.atom_type[sys_idx]))) self._in_array(np.load('sys_0/set.002/coord.npy'), ds.get_sys(sys_idx).idx_map, 3, data['coord']) self._in_array(np.load('sys_0/set.002/test.npy'), ds.get_sys(sys_idx).idx_map, self.test_ndof, data['test']) self.assertAlmostEqual( np.linalg.norm( np.zeros([ self.nframes[sys_idx] + 2, self.natoms[sys_idx] * self.test_ndof ]) - data['null']), 0.0) sys_idx = 2 data = ds.get_test(sys_idx=sys_idx) self.assertEqual(list(data['type'][0]), list(np.sort(self.atom_type[sys_idx]))) self._in_array(np.load('sys_2/set.002/coord.npy'), ds.get_sys(sys_idx).idx_map, 3, data['coord']) self._in_array(np.load('sys_2/set.002/test.npy'), ds.get_sys(sys_idx).idx_map, self.test_ndof, data['test']) self.assertAlmostEqual( np.linalg.norm( np.zeros([ self.nframes[sys_idx] + 2, self.natoms[sys_idx] * self.test_ndof ]) - data['null']), 0.0)
def test_batch_size_raise(self): batch_size = 'foo' test_size = 2 with self.assertRaises(RuntimeError): ds = DeepmdDataSystem(self.sys_name, batch_size, test_size, 2.0)
def test_batch_size_null(self): batch_size = 'auto:3' test_size = 2 ds = DeepmdDataSystem(self.sys_name, batch_size, test_size, 2.0) self.assertEqual(ds.batch_size, [1, 1, 1, 1])
def test_merge_all_stat(self): np.random.seed(0) data0 = DeepmdDataSystem(['system_0', 'system_1'], 5, 10, 1.0) data0.add('energy', 1, must=True) np.random.seed(0) data1 = DeepmdDataSystem(['system_0', 'system_1'], 5, 10, 1.0) data1.add('force', 3, atomic=True, must=True) np.random.seed(0) data2 = DeepmdDataSystem(['system_0', 'system_1'], 5, 10, 1.0) data2.add('force', 3, atomic=True, must=True) np.random.seed(0) all_stat_0 = make_all_stat(data0, 10, merge_sys=False) np.random.seed(0) all_stat_1 = make_all_stat(data1, 10, merge_sys=True) all_stat_2 = merge_sys_stat(all_stat_0) np.random.seed(0) all_stat_3 = _make_all_stat_ref(data2, 10) #################################### # only check if the energy is concatenated correctly #################################### dd = 'energy' # if 'find_' in dd: continue # if 'natoms_vec' in dd: continue # if 'default_mesh' in dd: continue # print(all_stat_2[dd]) # print(dd, all_stat_1[dd]) d1 = np.array(all_stat_1[dd]) d2 = np.array(all_stat_2[dd]) d3 = np.array(all_stat_3[dd]) # print(dd) # print(d1.shape) # print(d2.shape) # self.assertEqual(all_stat_2[dd], all_stat_1[dd]) self._comp_data(d1, d2) self._comp_data(d1, d3)