示例#1
0
    def __init__(self):
        # setup
        with open(os.path.join(os.path.dirname(__file__),
                               'water_se_a.json')) as fp:
            jdata = json.load(fp)
        self.run_opt = RunOptions(None)
        self.run_opt.verbose = False
        self.model = NNPTrainer(jdata, run_opt=self.run_opt)
        rcut = self.model.model.get_rcut()
        type_map = self.model.model.get_type_map()
        systems = [os.path.join(os.path.dirname(__file__), 'data')]
        set_pfx = jdata['training']['set_prefix']
        seed = jdata['training']['seed']

        np.random.seed(seed)
        batch_size = jdata['training']['batch_size']
        test_size = jdata['training']['numb_test']
        self.data = DeepmdDataSystem(systems,
                                     batch_size,
                                     test_size,
                                     rcut,
                                     set_prefix=set_pfx,
                                     run_opt=self.run_opt,
                                     type_map=type_map)
        self.data.add_dict(data_requirement)
        self.model.build(self.data)
        self.model._init_sess_serial()

        cur_batch = self.model.sess.run(self.model.global_step)
        self.cur_batch = cur_batch
示例#2
0
def _do_work(jdata, run_opt):
    # init the model
    model = NNPTrainer (jdata, run_opt = run_opt)
    rcut = model.model.get_rcut()
    type_map = model.model.get_type_map()
    # init params and run options
    assert('training' in jdata)
    systems = j_must_have(jdata['training'], 'systems')
    set_pfx = j_must_have(jdata['training'], 'set_prefix')
    numb_sys = len(systems)
    seed = None
    if 'seed' in jdata['training'].keys() : seed = jdata['training']['seed']
    if seed is not None:
       seed = seed % (2**32)
    np.random.seed (seed)
    batch_size = j_must_have(jdata['training'], 'batch_size')
    test_size = j_must_have(jdata['training'], 'numb_test')
    stop_batch = j_must_have(jdata['training'], 'stop_batch')
    if len(type_map) == 0:
       # empty type_map
       ipt_type_map = None
    else:
       ipt_type_map = type_map
    data = DeepmdDataSystem(systems, batch_size, test_size, rcut, set_prefix=set_pfx, run_opt=run_opt, type_map = ipt_type_map)
    data.add_dict(data_requirement)
    # build the model with stats from the first system
    model.build (data)
    # train the model with the provided systems in a cyclic way
    start_time = time.time()
    cur_batch = 0
    model.train (data, stop_batch)
    end_time = time.time()
    run_opt.message("finished training\nwall time: %.3f s" % (end_time-start_time))
 def test_ener_shift(self):
     np.random.seed(0)
     data = DeepmdDataSystem(['system_0', 'system_1'], 5, 10, 1.0)
     data.add('energy', 1, must=True)
     ener_shift0 = data.compute_energy_shift(rcond=1)
     all_stat = make_all_stat(data, 4, merge_sys=False)
     ener_shift1 = EnerFitting._compute_output_stats(all_stat, rcond=1)
     for ii in range(len(ener_shift0)):
         self.assertAlmostEqual(ener_shift0[ii], ener_shift1[ii])
示例#4
0
 def test_ntypes(self):
     batch_size = 3
     test_size = 2
     ds = DeepmdDataSystem(self.sys_name, batch_size, test_size, 2.0)
     ds.add('test', self.test_ndof, atomic=True, must=True)
     ds.add('null', self.test_ndof, atomic=True, must=False)
     self.assertEqual(ds.get_ntypes(), 3)
     self.assertEqual(ds.get_nbatches(), [2, 4, 3, 2])
     self.assertEqual(ds.get_nsystems(), self.nsys)
     self.assertEqual(list(ds.get_batch_size()), [batch_size] * 4)
示例#5
0
class Benchmark:
    def __init__(self):
        # setup
        with open(os.path.join(os.path.dirname(__file__),
                               'water_se_a.json')) as fp:
            jdata = json.load(fp)
        self.run_opt = RunOptions(None)
        self.run_opt.verbose = False
        self.model = NNPTrainer(jdata, run_opt=self.run_opt)
        rcut = self.model.model.get_rcut()
        type_map = self.model.model.get_type_map()
        systems = [os.path.join(os.path.dirname(__file__), 'data')]
        set_pfx = jdata['training']['set_prefix']
        seed = jdata['training']['seed']

        np.random.seed(seed)
        batch_size = jdata['training']['batch_size']
        test_size = jdata['training']['numb_test']
        self.data = DeepmdDataSystem(systems,
                                     batch_size,
                                     test_size,
                                     rcut,
                                     set_prefix=set_pfx,
                                     run_opt=self.run_opt,
                                     type_map=type_map)
        self.data.add_dict(data_requirement)
        self.model.build(self.data)
        self.model._init_sess_serial()

        cur_batch = self.model.sess.run(self.model.global_step)
        self.cur_batch = cur_batch

    def train_step(self):
        batch_data = self.data.get_batch(sys_weights=self.model.sys_weights)
        feed_dict_batch = {}
        for kk in batch_data.keys():
            if kk == 'find_type' or kk == 'type':
                continue
            if 'find_' in kk:
                feed_dict_batch[self.model.place_holders[kk]] = batch_data[kk]
            else:
                feed_dict_batch[self.model.place_holders[kk]] = np.reshape(
                    batch_data[kk], [-1])
        for ii in ['type']:
            feed_dict_batch[self.model.place_holders[ii]] = np.reshape(
                batch_data[ii], [-1])
        for ii in ['natoms_vec', 'default_mesh']:
            feed_dict_batch[self.model.place_holders[ii]] = batch_data[ii]
        feed_dict_batch[self.model.place_holders['is_training']] = True

        self.model.sess.run([self.model.train_op],
                            feed_dict=feed_dict_batch,
                            options=None,
                            run_metadata=None)
        self.model.sess.run(self.model.global_step)
示例#6
0
 def test_prob_sys_size_1(self):
     batch_size = 1
     test_size = 1
     ds = DeepmdDataSystem(self.sys_name, batch_size, test_size, 2.0)
     prob = ds._prob_sys_size_ext("prob_sys_size; 1:2:0.4; 2:4:1.6")
     self.assertAlmostEqual(np.sum(prob), 1)
     self.assertAlmostEqual(np.sum(prob[1:2]), 0.2)
     self.assertAlmostEqual(np.sum(prob[2:4]), 0.8)
     # number of training set is self.nset-1
     # shift is the total number of set size shift...
     shift = np.sum(np.arange(self.nset - 1))
     self.assertAlmostEqual(prob[0], 0.0)
     self.assertAlmostEqual(prob[1], 0.2)
     self.assertAlmostEqual(
         prob[3] / prob[2],
         float(self.nframes[3] * (self.nset - 1) + shift) /
         float(self.nframes[2] * (self.nset - 1) + shift))
示例#7
0
    def _setUp(self):
        args = Args()
        run_opt = RunOptions(args, False)
        with open (args.INPUT, 'r') as fp:
           jdata = json.load (fp)

        # init model
        model = NNPTrainer (jdata, run_opt = run_opt)
        rcut = model.model.get_rcut()

        # init data system
        systems = j_must_have(jdata['training'], 'systems')
        set_pfx = j_must_have(jdata['training'], 'set_prefix')
        batch_size = j_must_have(jdata['training'], 'batch_size')
        test_size = j_must_have(jdata['training'], 'numb_test')    
        data = DeepmdDataSystem(systems, 
                                batch_size, 
                                test_size, 
                                rcut, 
                                set_prefix=set_pfx)
        data.add_dict(data_requirement)

        # clear the default graph
        tf.reset_default_graph()

        # build the model with stats from the first system
        model.build (data)
        
        # freeze the graph
        with tf.Session() as sess:
            init_op = tf.global_variables_initializer()
            sess.run(init_op)
            graph = tf.get_default_graph()
            input_graph_def = graph.as_graph_def()
            nodes = "o_dipole,o_rmat,o_rmat_deriv,o_nlist,o_rij,descrpt_attr/rcut,descrpt_attr/ntypes,descrpt_attr/sel,descrpt_attr/ndescrpt,model_attr/tmap,model_attr/sel_type,model_attr/model_type"
            output_graph_def = tf.graph_util.convert_variables_to_constants(
                sess,
                input_graph_def,
                nodes.split(",") 
            )
            output_graph = os.path.join(modifier_datapath, 'dipole.pb')
            with tf.gfile.GFile(output_graph, "wb") as f:
                f.write(output_graph_def.SerializeToString())
示例#8
0
def _do_work(jdata, run_opt):
    # init the model
    model = NNPTrainer(jdata, run_opt=run_opt)
    rcut = model.model.get_rcut()
    type_map = model.model.get_type_map()
    # init params and run options
    assert ('training' in jdata)
    systems = j_must_have(jdata['training'], 'systems')
    if type(systems) == str:
        systems = expand_sys_str(systems)
    set_pfx = j_must_have(jdata['training'], 'set_prefix')
    seed = None
    if 'seed' in jdata['training'].keys(): seed = jdata['training']['seed']
    if seed is not None:
        seed = seed % (2**32)
    np.random.seed(seed)
    batch_size = j_must_have(jdata['training'], 'batch_size')
    test_size = j_must_have(jdata['training'], 'numb_test')
    stop_batch = j_must_have(jdata['training'], 'stop_batch')
    sys_probs = jdata['training'].get('sys_probs')
    auto_prob_style = jdata['training'].get('auto_prob_style', 'prob_sys_size')
    if len(type_map) == 0:
        # empty type_map
        ipt_type_map = None
    else:
        ipt_type_map = type_map
    # data modifier
    modifier = None
    modi_data = jdata['model'].get("modifier", None)
    if modi_data is not None:
        if modi_data['type'] == 'dipole_charge':
            modifier = DipoleChargeModifier(modi_data['model_name'],
                                            modi_data['model_charge_map'],
                                            modi_data['sys_charge_map'],
                                            modi_data['ewald_h'],
                                            modi_data['ewald_beta'])
        else:
            raise RuntimeError('unknown modifier type ' +
                               str(modi_data['type']))
    # init data
    data = DeepmdDataSystem(systems,
                            batch_size,
                            test_size,
                            rcut,
                            set_prefix=set_pfx,
                            type_map=ipt_type_map,
                            modifier=modifier)
    data.print_summary(run_opt,
                       sys_probs=sys_probs,
                       auto_prob_style=auto_prob_style)
    data.add_dict(data_requirement)
    # build the model with stats from the first system
    model.build(data, stop_batch)
    # train the model with the provided systems in a cyclic way
    start_time = time.time()
    model.train(data)
    end_time = time.time()
    run_opt.message("finished training\nwall time: %.3f s" %
                    (end_time - start_time))
示例#9
0
 def test_get_test(self):
     batch_size = 3
     test_size = 2
     ds = DeepmdDataSystem(self.sys_name, batch_size, test_size, 2.0)
     ds.add('test', self.test_ndof, atomic=True, must=True)
     ds.add('null', self.test_ndof, atomic=True, must=False)
     sys_idx = 0
     data = ds.get_test(sys_idx=sys_idx)
     self.assertEqual(list(data['type'][0]),
                      list(np.sort(self.atom_type[sys_idx])))
     self._in_array(np.load('sys_0/set.002/coord.npy'),
                    ds.get_sys(sys_idx).idx_map, 3, data['coord'])
     self._in_array(np.load('sys_0/set.002/test.npy'),
                    ds.get_sys(sys_idx).idx_map, self.test_ndof,
                    data['test'])
     self.assertAlmostEqual(
         np.linalg.norm(
             np.zeros([
                 self.nframes[sys_idx] + 2, self.natoms[sys_idx] *
                 self.test_ndof
             ]) - data['null']), 0.0)
     sys_idx = 2
     data = ds.get_test(sys_idx=sys_idx)
     self.assertEqual(list(data['type'][0]),
                      list(np.sort(self.atom_type[sys_idx])))
     self._in_array(np.load('sys_2/set.002/coord.npy'),
                    ds.get_sys(sys_idx).idx_map, 3, data['coord'])
     self._in_array(np.load('sys_2/set.002/test.npy'),
                    ds.get_sys(sys_idx).idx_map, self.test_ndof,
                    data['test'])
     self.assertAlmostEqual(
         np.linalg.norm(
             np.zeros([
                 self.nframes[sys_idx] + 2, self.natoms[sys_idx] *
                 self.test_ndof
             ]) - data['null']), 0.0)
示例#10
0
 def test_batch_size_raise(self):
     batch_size = 'foo'
     test_size = 2
     with self.assertRaises(RuntimeError):
         ds = DeepmdDataSystem(self.sys_name, batch_size, test_size, 2.0)
示例#11
0
 def test_batch_size_null(self):
     batch_size = 'auto:3'
     test_size = 2
     ds = DeepmdDataSystem(self.sys_name, batch_size, test_size, 2.0)
     self.assertEqual(ds.batch_size, [1, 1, 1, 1])
示例#12
0
    def test_merge_all_stat(self):
        np.random.seed(0)
        data0 = DeepmdDataSystem(['system_0', 'system_1'], 5, 10, 1.0)
        data0.add('energy', 1, must=True)
        np.random.seed(0)
        data1 = DeepmdDataSystem(['system_0', 'system_1'], 5, 10, 1.0)
        data1.add('force', 3, atomic=True, must=True)
        np.random.seed(0)
        data2 = DeepmdDataSystem(['system_0', 'system_1'], 5, 10, 1.0)
        data2.add('force', 3, atomic=True, must=True)

        np.random.seed(0)
        all_stat_0 = make_all_stat(data0, 10, merge_sys=False)
        np.random.seed(0)
        all_stat_1 = make_all_stat(data1, 10, merge_sys=True)
        all_stat_2 = merge_sys_stat(all_stat_0)
        np.random.seed(0)
        all_stat_3 = _make_all_stat_ref(data2, 10)

        ####################################
        # only check if the energy is concatenated correctly
        ####################################
        dd = 'energy'
        # if 'find_' in dd: continue
        # if 'natoms_vec' in dd: continue
        # if 'default_mesh' in dd: continue
        # print(all_stat_2[dd])
        # print(dd, all_stat_1[dd])
        d1 = np.array(all_stat_1[dd])
        d2 = np.array(all_stat_2[dd])
        d3 = np.array(all_stat_3[dd])
        # print(dd)
        # print(d1.shape)
        # print(d2.shape)
        # self.assertEqual(all_stat_2[dd], all_stat_1[dd])
        self._comp_data(d1, d2)
        self._comp_data(d1, d3)