示例#1
0
def load_model(model_name: str) -> GraphModel:
    """
    load the model by user friendly name as in megnet.utils.models.AVAILABEL_MODELS

    Args:
        model_name: str model name string

    Returns: GraphModel

    """

    if model_name in AVAILABLE_MODELS:
        mvl_path = os.path.join(MODEL_PATH, MODEL_MAPPING[model_name])
        if os.path.isfile(mvl_path):
            return MEGNetModel.from_file(mvl_path)

        logger.info(
            "Package-level mvl_models not included, trying temperary mvl_models downloads.."
        )
        local_mvl_path = os.path.join(LOCAL_MODEL_PATH,
                                      MODEL_MAPPING[model_name])
        if os.path.isfile(local_mvl_path):
            logger.info("Model found in local mvl_models path")
            return MEGNetModel.from_file(local_mvl_path)
        _download_models()
        return load_model(model_name)
    else:
        raise ValueError('model name %s not in available model list %s' %
                         (model_name, AVAILABLE_MODELS))
示例#2
0
 def test_check_dimension(self):
     gc = CrystalGraph(bond_converter=GaussianDistance(np.linspace(0, 5, 20), 0.5))
     s = Structure(Lattice.cubic(3), ['Si'], [[0, 0, 0]])
     graph = gc.convert(s)
     model = MEGNetModel(10, 2, nblocks=1, lr=1e-2,
                         n1=4, n2=4, n3=4, npass=1, ntarget=1,
                         graph_converter=CrystalGraph(bond_converter=gc),
                         )
     with self.assertRaises(Exception) as context:
         model.check_dimension(graph)
         self.assertTrue('The data dimension for bond' in str(context.exception))
示例#3
0
 def __init__(self, target_name):
     self.model = MEGNetModel.from_file(
         pjoin(QM9_MODELDIR, target_name + ".hdf5"))
     self.model.graph_convertor.atom_convertor = AtomNumberToTypeConvertor()
     self.scaler = Scaler(SCALER[target_name]['mean'],
                          SCALER[target_name]['std'],
                          SCALER[target_name]['is_per_atom'])
示例#4
0
 def test_from_mvl_models(self):
     with ScratchDir("."):
         model = MEGNetModel.from_mvl_models("Eform_MP_2019")
         li2o = self.get_structure("Li2O")
         self.assertAlmostEqual(float(model.predict_structure(li2o)),
                                -2.0152957439422607,
                                places=4)
示例#5
0
 def test_from_url(self):
     with ScratchDir("."):
         model = MEGNetModel.from_url(
             "https://github.com/materialsvirtuallab/megnet/raw/master/mvl_models/mp-2019.4.1/formation_energy.hdf5"
         )
         li2o = self.get_structure("Li2O")
         self.assertAlmostEqual(float(model.predict_structure(li2o)),
                                -2.0152957439422607)
示例#6
0
    def setUpClass(cls):
        cls.n_feature = 3
        cls.n_bond_features = 10
        cls.n_global_features = 2

        class Generator(Sequence):
            def __init__(self, x, y):
                self.x = x
                self.y = y
            def __len__(self):
                return 10
            def __getitem__(self, index):
                return  self.x, self.y

        x_crystal = [np.array([1, 2, 3, 4]).reshape((1, -1)),
                     np.random.normal(size=(1, 6, cls.n_bond_features)),
                     np.random.normal(size=(1, 2, cls.n_global_features)),
                     np.array([[0, 0, 1, 1, 2, 3]]),
                     np.array([[1, 1, 0, 0, 3, 2]]),
                     np.array([[0, 0, 1, 1]]),
                     np.array([[0, 0, 0, 0, 1, 1]]),
                     ]

        y = np.random.normal(size=(1, 2, 1))
        cls.train_gen_crystal = Generator(x_crystal, y)
        x_mol = [np.random.normal(size=(1, 4, cls.n_feature)),
                 np.random.normal(size=(1, 6, cls.n_bond_features)),
                 np.random.normal(size=(1, 2, cls.n_global_features)),
                 np.array([[0, 0, 1, 1, 2, 3]]),
                 np.array([[1, 1, 0, 0, 3, 2]]),
                 np.array([[0, 0, 1, 1]]),
                 np.array([[0, 0, 0, 0, 1, 1]]),
                 ]
        y = np.random.normal(size=(1, 2, 1))
        cls.train_gen_mol = Generator(x_mol, y)

        cls.model = MEGNetModel(10, 2, nblocks=1, lr=1e-2,
                                n1=4, n2=4, n3=4, npass=1, ntarget=1,
                                graph_converter=CrystalGraph(bond_converter=GaussianDistance(np.linspace(0, 5, 10), 0.5)),
                                )
        cls.model2 = MEGNetModel(10, 2, nblocks=1, lr=1e-2,
                                 n1=4, n2=4, n3=4, npass=1, ntarget=2,
                                 graph_converter=CrystalGraph(bond_converter=GaussianDistance(np.linspace(0, 5, 10), 0.5)),
                                 )
示例#7
0
 def test_crystal_model_v2(self):
     cg = CrystalGraph()
     s = Structure(Lattice.cubic(3), ['Si'], [[0, 0, 0]])
     with ScratchDir('.'):
         model = MEGNetModel(nfeat_edge=None,
                             nfeat_global=2,
                             nblocks=1,
                             lr=1e-2,
                             n1=4,
                             n2=4,
                             n3=4,
                             npass=1,
                             ntarget=1,
                             graph_converter=cg,
                             centers=np.linspace(0, 4, 10),
                             width=0.5)
         model = model.train([s, s], [0.1, 0.1], epochs=2)
         t = model.predict_structure(s)
         self.assertTrue(t.shape == (1, ))
示例#8
0
 def setUpClass(cls):
     cls.s = Structure.from_spacegroup('Fm-3m', Lattice.cubic(5.69169),
                                       ['Na', 'Cl'],
                                       [[0, 0, 0], [0, 0, 0.5]])
     cls.dummy_model = MEGNetModel(100,
                                   2,
                                   nblocks=1,
                                   n1=4,
                                   n2=2,
                                   n3=2,
                                   npass=1)
示例#9
0
    def __init__(self,
                 model_name: Union[str, GraphModel,
                                   MEGNetModel] = DEFAULT_MODEL,
                 use_cache: bool = True):
        """
        Args:
            model_name (str or MEGNetModel): trained model. If it is
                str, then only models in mvl_models are used.
            use_cache (bool): whether to use cache for structure
                graph calculations
        """
        if isinstance(model_name, str):
            model = MEGNetModel.from_file(model_name)
        elif isinstance(model_name, GraphModel):
            model = model_name
        else:
            raise ValueError('model_name only support str '
                             'or GraphModel object')

        layers = model.layers
        important_prefix = ['meg', 'set', 'concatenate']

        all_names = [
            i.name for i in layers
            if any([i.name.startswith(j) for j in important_prefix])
        ]

        if any([i.startswith('megnet') for i in all_names]):
            self.version = 'v2'
        else:
            self.version = 'v1'

        valid_outputs = [
            i.output for i in layers
            if any([i.name.startswith(j) for j in important_prefix])
        ]

        outputs = []
        valid_names = []
        for i, j in zip(all_names, valid_outputs):
            if isinstance(j, list):
                for k, l in enumerate(j):
                    valid_names.append(i + '_%d' % k)
                    outputs.append(l)
            else:
                valid_names.append(i)
                outputs.append(j)

        full_model = Model(inputs=model.inputs, outputs=outputs)
        model.model = full_model
        self.model = model
        self.valid_names = valid_names
        self._cache: Dict[str, float] = {}
        self.use_cache = use_cache
示例#10
0
def random_structure_on_substrate(symbols,
                                  amin,
                                  amax,
                                  dmin,
                                  model_file,
                                  Natt=RANDOM_ATTEMPTS):
    # returns random structure (ase Atoms) on substrate with lowest e_tot according to Megnet model
    substrate = read_vasp("POSCAR.substrate")
    adapt = AseAtomsAdaptor()
    model = MEGNetModel.from_file(model_file)
    e_tot_min = 1000.

    for i in range(Natt):
        s = surface(substrate, (0, 0, 1), 1, vacuum=0., tol=1e-10)
        cell = s.get_cell()
        cell[2][2] = CELL_Z
        s.set_cell(cell)
        amin = cell[0][0]
        amax = cell[0][0]
        struct = random_structure(symbols, amin, amax, dmin, iwrite=0)

        j = 0
        atoms = struct.get_chemical_symbols()
        positions = struct.get_positions()
        for atom in atoms:
            at = Atom(atom)
            positions[j][2] = positions[j][2] + SURF_DIST
            pos = positions[j]
            at.position = pos
            s.append(at)
            j = j + 1

        struct_pymatgen = adapt.get_structure(s)
        try:
            e_tot = model.predict_structure(struct_pymatgen)
            # print(e_tot)
        except:
            e_tot = 0.
            print("isolated molecule exception handled")
        if e_tot < e_tot_min:
            struct_out = s
            e_tot_min = e_tot

    print("e_tot min: ", e_tot_min)
    write(filename='best.in', images=struct_out, format="espresso-in")

    del model

    return struct_out
示例#11
0
def random_structure_group(symbols,
                           composition,
                           thickness,
                           tol_factor,
                           model_file,
                           dmin=2.0,
                           Natt=RANDOM_ATTEMPTS):
    # returns pyxtal generated structure (ase Atoms) with lowest e_tot according to megnet model
    tol_m_1 = Tol_matrix(prototype="atomic", factor=tol_factor)
    adapt = AseAtomsAdaptor()
    model = MEGNetModel.from_file(model_file)
    e_tot_min = 0.

    for i in range(Natt):
        group_id = randrange(80) + 1
        my_crystal = random_crystal_2D(group_id,
                                       symbols,
                                       composition,
                                       1.0,
                                       thickness=thickness,
                                       tm=tol_m_1)
        flag = 0
        if my_crystal.valid == True:
            struct = crystal_to_atoms(my_crystal)
            Nat = len(struct.get_chemical_symbols())
            struct_pymatgen = adapt.get_structure(struct)
            try:
                e_tot = model.predict_structure(struct_pymatgen)
            except:
                e_tot = 0.
                print("isolated molecule exception handled")
            struct2x2x1 = struct * (2, 2, 1)
            positions = struct2x2x1.get_positions()
            # positions = struct.get_positions()
            # print(struct)
            flag = check_dist(Nat * 2 * 2, positions, dmin)
            # print(flag)

            if (e_tot < e_tot_min) and flag == 1:
                struct_out = struct
                e_tot_min = e_tot

    print("e_tot/atom: " + str(e_tot_min))
    # write(filename="POSCAR.best.in", images=struct_out, format="espresso-in")
    # write(filename="POSCAR.best", images=struct_out, format="vasp")

    del model

    return struct_out
示例#12
0
文件: models.py 项目: yarenty/megnet
def load_model(model_name):
    """
    load the model by user friendly name as in megnet.utils.models.AVAILABEL_MODELS

    Args:
        model_name: str model name string

    Returns: GraphModel

    """

    if model_name in AVAILABLE_MODELS:
        return MEGNetModel.from_file(MODEL_MAPPING[model_name])
    else:
        raise ValueError('model name %s not in available model list %s' % (model_name, AVAILABLE_MODELS))
示例#13
0
def show_layers(model_file):
    """
    show_layers(model_file)

    Displays information on layers of a pre-trained 
    MEGNet model. 

    Inputs:
        model_file-      A pre-trained MEGNet model file.

    Outputs:
         1-              Layers in the model file.
    """
    pretrained_model = MEGNetModel.from_file(model_file)
    print(pretrained_model.summary())
示例#14
0
    def setUpClass(cls):
        cls.n_feature = 3
        cls.n_bond_features = 10
        cls.n_global_features = 2

        def generator(x, y):
            while True:
                yield x, y

        x_crystal = [
            np.array([1, 2, 3, 4]).reshape((1, -1)),
            np.random.normal(size=(1, 6, cls.n_bond_features)),
            np.random.normal(size=(1, 2, cls.n_global_features)),
            np.array([[0, 0, 1, 1, 2, 3]]),
            np.array([[1, 1, 0, 0, 3, 2]]),
            np.array([[0, 0, 1, 1]]),
            np.array([[0, 0, 0, 0, 1, 1]]),
        ]

        y = np.random.normal(size=(1, 2, 1))
        cls.train_gen_crystal = generator(x_crystal, y)
        x_mol = [
            np.random.normal(size=(1, 4, cls.n_feature)),
            np.random.normal(size=(1, 6, cls.n_bond_features)),
            np.random.normal(size=(1, 2, cls.n_global_features)),
            np.array([[0, 0, 1, 1, 2, 3]]),
            np.array([[1, 1, 0, 0, 3, 2]]),
            np.array([[0, 0, 1, 1]]),
            np.array([[0, 0, 0, 0, 1, 1]]),
        ]
        y = np.random.normal(size=(1, 2, 1))
        cls.train_gen_mol = generator(x_mol, y)

        cls.model = MEGNetModel(
            10,
            2,
            nblocks=1,
            lr=1e-2,
            n1=4,
            n2=4,
            n3=4,
            npass=1,
            ntarget=1,
            graph_convertor=CrystalGraph(
                bond_convertor=GaussianDistance(np.linspace(0, 5, 10), 0.5)),
        )
示例#15
0
def find_sub_tree(cur_tag, input_history_tag):
    ###### load model #######
    father_model_name = dump_model_name + '_' + input_history_tag + '.hdf5'
    #########################
    for db_str in cur_tag:
        history_tag = input_history_tag
        history_tag += '_'
        history_tag += db_str
        if special_path != '' and history_tag not in special_path:
            continue
        else:
            pass
        cur_model_name = dump_model_name + '_' + history_tag + '.hdf5'
        cur_model = MEGNetModel.from_file(father_model_name)
        ###### get dataset ######
        s, t = construct_dataset_from_str(db_str)
        l = len(s)
        ###### train ############
        try:
            cur_model.train(s[:int(0.8*l)], t[:int(0.8*l)],
                        validation_structures=s[int(0.8*l):],
                        validation_targets=t[int(0.8*l):],
                        callbacks=[callback],
                        save_checkpoint=False,
                        automatic_correction=False,
                        batch_size = 256,
                        epochs=ep)
        except TypeError:
            logging.info('MAE of {tag} is: {mae}'.format(tag=history_tag, mae='nan'))
        else:
            mae = prediction(cur_model)
            logging.info('MAE of {tag} is: {mae}'.format(tag=history_tag, mae=mae))
        cur_model.save_model(cur_model_name)
        del s, t, l
        gc.collect()
        ###### next level #######
        if len(cur_tag) > 1:
            tmp_tag = cur_tag
            next_tag = tmp_tag.replace(db_str, '')
            find_sub_tree(next_tag, history_tag)
        else:
            pass
示例#16
0
    def __init__(self,
                 model_name: Union[str, GraphModel,
                                   MEGNetModel] = DEFAULT_MODEL,
                 use_cache: bool = True):
        if isinstance(model_name, str):
            model = MEGNetModel.from_file(model_name)
        elif isinstance(model_name, GraphModel):
            model = model_name
        else:
            raise ValueError('model_name only support str '
                             'or GraphModel object')

        layers = model.layers
        important_prefix = ['meg', 'set', 'concatenate']

        all_names = [
            i.name for i in layers
            if any([i.name.startswith(j) for j in important_prefix])
        ]

        valid_outputs = [
            i.output for i in layers
            if any([i.name.startswith(j) for j in important_prefix])
        ]

        outputs = []
        valid_names = []
        for i, j in zip(all_names, valid_outputs):
            if isinstance(j, list):
                for k, l in enumerate(j):
                    valid_names.append(i + '_%d' % k)
                    outputs.append(l)
            else:
                valid_names.append(i)
                outputs.append(j)

        full_model = Model(inputs=model.inputs, outputs=outputs)
        model.model = full_model
        self.model = model
        self.valid_names = valid_names
        self._cache = {}
        self.use_cache = use_cache
示例#17
0
def random_structure_model(stoichio,
                           amin,
                           amax,
                           dmin,
                           model_file,
                           Natt=RANDOM_ATTEMPTS):
    # returns random structure (ase Atoms) with lowest e_tot according to megnet model
    adapt = AseAtomsAdaptor()
    model = MEGNetModel.from_file(model_file)
    e_tot_min = 0.
    flag = 0

    for i in range(Natt):
        struct = random_structure(stoichio, amin, amax, dmin)
        # Nat = len(struct.get_chemical_symbols())
        struct_pymatgen = adapt.get_structure(struct)
        try:
            e_tot = model.predict_structure(struct_pymatgen)
        except:
            e_tot = 0.
            print("isolated molecule exception handled")
        if e_tot < e_tot_min:
            struct_out = struct
            e_tot_min = e_tot
            flag = 1
    if flag == 0:
        print("Warning: structure not generated!")
        struct_out = Atoms(stoichio)
    if flag == 1:
        print("e_tot/atom: " + str(e_tot_min))
        write(filename='POSCAR_' + random_str(5) + '.in',
              images=struct_out,
              format="espresso-in")

    del model

    return struct_out
示例#18
0
    def __init__(self, model_name=DEFAULT_MODEL, use_cache=True):
        model = MEGNetModel.from_file(model_name)
        layers = model.layers
        important_prefix = ['meg', 'set', 'concatenate']
        all_names = [i.name for i in layers if any([i.name.startswith(j) for j in important_prefix])]
        valid_outputs = [i.output for i in layers if any([i.name.startswith(j) for j in important_prefix])]

        outputs = []
        valid_names = []
        for i, j in zip(all_names, valid_outputs):
            if isinstance(j, list):
                for k, l in enumerate(j):
                    valid_names.append(i + '_%d' % k)
                    outputs.append(l)
            else:
                valid_names.append(i)
                outputs.append(j)

        full_model = Model(inputs=model.inputs, outputs=outputs)
        model.model = full_model
        self.model = model
        self.valid_names = valid_names
        self._cache = {}
        self.use_cache = use_cache
示例#19
0
    def load(cls: "MEGNetProbModel",
             save_path: PathLike,
             load_ckpt: bool = True) -> "MEGNetProbModel":
        """Load a MEGNetProbModel from disk.

        Args:
            save_path: The path to the model's save directory.
            load_ckpt: Whether to load the best checkpoint's weights, instead
                of those saved at the time of the last :meth:`save`.

        Returns:
            The loaded model.

        Raises:
            FileNotFoundError: If the ``save_path`` or any components do not exist.

        """
        paths = _get_save_paths(save_path)
        try:
            meg_model = MEGNetModel.from_file(str(paths["meg_path"]))
        except OSError:
            raise FileNotFoundError(
                f"No saved MEGNetModel at `{paths['meg_path']}`.")
        return super().load(save_path, load_ckpt, meg_model=meg_model)
示例#20
0
def prepare_model_megnet(individuals, epochs, outfile, excl=[]):
    # prepares model file
    # prepares Megnet model based on list of individuals
    # uses total energy per atom
    # excl - excluding particular stoichiometry - important for network learning
    structures = []
    energies = []
    adapt = AseAtomsAdaptor()
    empty = 0
    if not excl:
        empty = 1

    i = 0
    for ind in individuals:
        struct_ase = ind.get_init_structure()
        chem_sym = struct_ase.get_chemical_symbols()
        e_tot = ind.e_tot
        struct_pymatgen = adapt.get_structure(struct_ase)
        flag = 1
        if empty == 0 and chem_sym == excl:
            flag = 0

        if flag == 1:
            structures.append(struct_pymatgen)
            energies.append(e_tot)
            i = i + 1

    print("read data of " + str(i) + " structures total")

    # standard vales as taken from Megnet manual
    nfeat_bond = 100
    nfeat_global = 2
    r_cutoff = 5
    gaussian_centers = np.linspace(0, r_cutoff + 1, nfeat_bond)
    gaussian_width = 0.5
    distance_converter = GaussianDistance(gaussian_centers, gaussian_width)
    graph_converter = CrystalGraph(bond_converter=distance_converter, cutoff=r_cutoff)
    model = MEGNetModel(nfeat_bond, nfeat_global, graph_converter=graph_converter)

    # model training
    model.train(structures, energies, epochs=epochs)

    model.save_model(outfile)
示例#21
0
from megnet.models import MEGNetModel
model_form = MEGNetModel.from_file(
    '/home/vol00/scarf690/src/megnet/mvl_models/mp-2018.6.1/band_gap_regression.hdf5'
)
示例#22
0
## Set GPU
os.environ["CUDA_VISIBLE_DEVICES"] = "0"

#  2. Model construction
##  Graph converter
crystal_graph = CrystalGraph(bond_converter=GaussianDistance(
    centers=np.linspace(0, 6, 100), width=0.5),
                             cutoff=5.0)
## model setup
model = MEGNetModel(
    nfeat_edge=100,
    nfeat_global=None,
    ngvocal=len(TRAIN_FIDELITIES),
    global_embedding_dim=16,
    nblocks=3,
    nvocal=95,
    npass=2,
    graph_converter=crystal_graph,
    lr=1e-3,
)

#  3. Data loading and processing
##  load data

##  Structure data for all materials project materials

if not os.path.isfile("mp.2019.04.01.json"):
    raise RuntimeError(
        "Please download the data first! Use runall.sh in this directory if needed."
    )
     return result

# === megnet start === #

from megnet.models import MEGNetModel
from megnet.data.graph import GaussianDistance
from megnet.data.crystal import CrystalGraph
from megnet.utils.preprocessing import StandardScaler

from megnet.callbacks import ReduceLRUponNan, ManualStop, XiaotongCB

import numpy as np

gc = CrystalGraph(bond_converter=GaussianDistance(
        np.linspace(0, 5, 100), 0.5), cutoff=4)
model = MEGNetModel(100, 2, graph_converter=gc, lr=1e-4, loss=examine_loss) # , metrics=[examine_loss])
INTENSIVE = False # U0 is an extensive quantity
scaler = StandardScaler.from_training_data(structures, targets, is_intensive=INTENSIVE)
model.target_scaler = scaler

# callbacks = [ReduceLRUponNan(patience=500), ManualStop(), XiaotongCB()]

# change structures to megnet predictable structures
mp_strs = []

train_graphs, train_targets = model.get_all_graphs_targets(structures, targets)
train_nb_atoms = [len(i['atom']) for i in train_graphs]
train_targets = [model.target_scaler.transform(i, j) for i, j in zip(train_targets, train_nb_atoms)]


for s in structures:
示例#24
0
Xtrain = inputs.iloc[0:boundary]['structure']
ytrain = inputs.iloc[0:boundary]['band_gap']

Xtest = inputs.iloc[boundary:]['structure']
ytest = inputs.iloc[boundary:]['band_gap']

nfeat_bond = 10
nfeat_global = 2
r_cutoff = 5
gaussian_centers = np.linspace(0, 5, 10)
gaussian_width = 0.5
distance_convertor = GaussianDistance(gaussian_centers, gaussian_width)
bond_convertor = CrystalGraph(bond_convertor=distance_convertor,
                              cutoff=r_cutoff)
graph_convertor = CrystalGraph(
    bond_convertor=GaussianDistance(np.linspace(0, 5, 10), 0.5))
model = MEGNetModel(nfeat_bond, nfeat_global, graph_convertor=graph_convertor)

model.from_file('fitted_gap_model.hdf5')

model.train(Xtrain,
            ytrain,
            epochs=epochs,
            batch_size=batch_size,
            validation_structures=Xtest,
            validation_targets=ytest,
            scrub_failed_structures=True)

model.save_model('fitted_gap_model.hdf5')
示例#25
0
    MAE = 0
    test_size = len(test_structures)
    for i in range(test_size):
        MAE += abs(model.predict_structure(test_structures[i]).ravel() - test_targets[i])
    MAE /= test_size
    print('MAE is:', MAE)

training_mode = int(sys.argv[1])

# data preprocess part


if True:
    import pickle
    # load the past if needed
    model = MEGNetModel.from_file('6a34b94_9_2.hdf5')
    idx = 0
    for sz in data_size[:-1]:
        ME = 0
        error_lst = []
        for i in range(idx, idx + sz):
            e = (model.predict_structure(structures[i]).ravel() - targets[i])
            ME += e
            error_lst.append(e)
            if abs(e) > 0.5:
                targets[i] = model.predict_structure(structures[i]).ravel()
            # targets[i] = (model.predict_structure(structures[i]).ravel() + targets[i])/2
        ME /= sz
        f = open(str(sz) + 'txt', 'wb')
        pickle.dump(error_lst, f)
        f.close()
示例#26
0
# Data preprocessing:
# Load binary compounds' formation energies example data,
# then split into training and validation subsets.
full_df = load_data("binary_e_form")
num_training = int(TRAINING_RATIO * len(full_df.index))
train_df = full_df[:num_training]
val_df = full_df[num_training:]
# 4217 training samples, 1055 validation samples.

train_structs = train_df["structure"]
val_structs = val_df["structure"]
train_targets = train_df["formation_energy_per_atom"]
val_targets = val_df["formation_energy_per_atom"]

# 1. Load MEGNetModel
meg_model = MEGNetModel.from_mvl_models("Eform_MP_2019")

# 2. Make probabilistic model
# Specify Kullback-Leibler divergence weighting in loss function:
kl_weight = BATCH_SIZE / num_training
# Then make the model:
prob_model = MEGNetProbModel(
    meg_model=meg_model,
    num_inducing_points=NUM_INDUCING_POINTS,
    kl_weight=kl_weight,
)


def train_model():
    """Train and save the probabilistic model."""
    prob_model.train(
###### megnet example hyper-parameters
from megnet.models import MEGNetModel
from megnet.data.graph import GaussianDistance
from megnet.data.crystal import CrystalGraph
import numpy as np

nfeat_bond = 100
nfeat_global = 2
r_cutoff = 5
gaussian_centers = np.linspace(0, r_cutoff + 1, nfeat_bond)
gaussian_width = 0.5
distance_converter = GaussianDistance(gaussian_centers, gaussian_width)
graph_converter = CrystalGraph(bond_converter=distance_converter,
                               cutoff=r_cutoff)
model = MEGNetModel(nfeat_bond, nfeat_global, graph_converter=graph_converter)

#########################################


def cvt_fmt_graph(rows):
    structures = []
    props = []
    for row in rows:
        structures.append(
            pymatgen_io_ase.AseAtomsAdaptor.get_structure(row.toatoms()))
        props.append(row.data[predict_item] / 100)
        # props.append(abs(row.data[predict_item]/10))
    graphs_valid = []
    targets_valid = []
    structures_invalid = []
示例#28
0
if swap_E1_test:
    structures['E1'], test_structures = test_structures, structures['E1']
    targets['E1'], test_targets = test_targets, targets['E1']

logging.info('dataset EXP, element dict: {d}'.format(item=it, d=Counter(sp_lst)))

logging.info(str(structures.keys()) + str(targets.keys()))
for k in structures.keys():
    logging.info(str(len(structures[k])) + str(len(targets[k])))

# data preprocess part
if load_old_model_enable:
    import pickle
    # load the past if needed
    model = MEGNetModel.from_file(old_model_name)
    if predict_before_dataclean:
        prediction(model)
    diff_lst = []
    for i in range(len(s_exp)):
        diff_lst.append(model.predict_structure(s_exp[i]).ravel() - t_exp[i])
    logging.info('Std of the list(model output - exp data) is: {std}, \
mean is: {mean}'.format(std=np.std(diff_lst),
                mean=np.mean(diff_lst)))

    for it in items:
        error_lst = []
        prediction_lst = []
        targets_lst = []
        for i in range(len(structures[it])):
            prdc = model.predict_structure(structures[it][i]).ravel()
示例#29
0
import pandas as pd
import json
from tqdm import tqdm
from sklearn.metrics import mean_absolute_error

inputs = pd.read_pickle('./band_gap_data.pkl')


boundary = int(len(inputs)*0.75)
epochs = 5
batch_size=56

Xtrain = inputs.iloc[0:boundary]['structure'] 
ytrain = inputs.iloc[0:boundary]['band_gap'] 

Xtest = inputs.iloc[boundary:]['structure'] 
ytest = inputs.iloc[boundary:]['band_gap'] 

for j in range(5):
    model = MEGNetModel.from_file('../entropy/0%s_model/fitted_band_gap_model.hdf5' % j)
    model.load_weights('../entropy/0%s_model/model-best-new-band_gap.h5' % j)
    preds = []
    vals = []
    for i in tqdm(range(len(Xtrain[-1000:]))):
        if ytrain[i] > 0:
            bg = model.predict_structure(Xtrain[i])
            preds.append(bg)
            vals.append(ytrain[i])
    print(mean_absolute_error(preds, vals))

示例#30
0
 def setUpClass(cls):
     cls.molecule = Molecule(["C", "O", "O"],
                             [[0, 0, 0], [-1, 0, 0], [1, 0, 0]])
     cls.model = MEGNetModel.from_file(
         os.path.join(
             CWD, "../../../mvl_models/mp-2019.4.1/formation_energy.hdf5"))