Exemplo n.º 1
0
def load_model(model_name: str) -> GraphModel:
    """
    load the model by user friendly name as in megnet.utils.models.AVAILABEL_MODELS

    Args:
        model_name: str model name string

    Returns: GraphModel

    """

    if model_name in AVAILABLE_MODELS:
        mvl_path = os.path.join(MODEL_PATH, MODEL_MAPPING[model_name])
        if os.path.isfile(mvl_path):
            return MEGNetModel.from_file(mvl_path)

        logger.info(
            "Package-level mvl_models not included, trying temperary mvl_models downloads.."
        )
        local_mvl_path = os.path.join(LOCAL_MODEL_PATH,
                                      MODEL_MAPPING[model_name])
        if os.path.isfile(local_mvl_path):
            logger.info("Model found in local mvl_models path")
            return MEGNetModel.from_file(local_mvl_path)
        _download_models()
        return load_model(model_name)
    else:
        raise ValueError('model name %s not in available model list %s' %
                         (model_name, AVAILABLE_MODELS))
Exemplo n.º 2
0
 def __init__(self, target_name):
     self.model = MEGNetModel.from_file(
         pjoin(QM9_MODELDIR, target_name + ".hdf5"))
     self.model.graph_convertor.atom_convertor = AtomNumberToTypeConvertor()
     self.scaler = Scaler(SCALER[target_name]['mean'],
                          SCALER[target_name]['std'],
                          SCALER[target_name]['is_per_atom'])
Exemplo n.º 3
0
    def __init__(self,
                 model_name: Union[str, GraphModel,
                                   MEGNetModel] = DEFAULT_MODEL,
                 use_cache: bool = True):
        """
        Args:
            model_name (str or MEGNetModel): trained model. If it is
                str, then only models in mvl_models are used.
            use_cache (bool): whether to use cache for structure
                graph calculations
        """
        if isinstance(model_name, str):
            model = MEGNetModel.from_file(model_name)
        elif isinstance(model_name, GraphModel):
            model = model_name
        else:
            raise ValueError('model_name only support str '
                             'or GraphModel object')

        layers = model.layers
        important_prefix = ['meg', 'set', 'concatenate']

        all_names = [
            i.name for i in layers
            if any([i.name.startswith(j) for j in important_prefix])
        ]

        if any([i.startswith('megnet') for i in all_names]):
            self.version = 'v2'
        else:
            self.version = 'v1'

        valid_outputs = [
            i.output for i in layers
            if any([i.name.startswith(j) for j in important_prefix])
        ]

        outputs = []
        valid_names = []
        for i, j in zip(all_names, valid_outputs):
            if isinstance(j, list):
                for k, l in enumerate(j):
                    valid_names.append(i + '_%d' % k)
                    outputs.append(l)
            else:
                valid_names.append(i)
                outputs.append(j)

        full_model = Model(inputs=model.inputs, outputs=outputs)
        model.model = full_model
        self.model = model
        self.valid_names = valid_names
        self._cache: Dict[str, float] = {}
        self.use_cache = use_cache
Exemplo n.º 4
0
def random_structure_on_substrate(symbols,
                                  amin,
                                  amax,
                                  dmin,
                                  model_file,
                                  Natt=RANDOM_ATTEMPTS):
    # returns random structure (ase Atoms) on substrate with lowest e_tot according to Megnet model
    substrate = read_vasp("POSCAR.substrate")
    adapt = AseAtomsAdaptor()
    model = MEGNetModel.from_file(model_file)
    e_tot_min = 1000.

    for i in range(Natt):
        s = surface(substrate, (0, 0, 1), 1, vacuum=0., tol=1e-10)
        cell = s.get_cell()
        cell[2][2] = CELL_Z
        s.set_cell(cell)
        amin = cell[0][0]
        amax = cell[0][0]
        struct = random_structure(symbols, amin, amax, dmin, iwrite=0)

        j = 0
        atoms = struct.get_chemical_symbols()
        positions = struct.get_positions()
        for atom in atoms:
            at = Atom(atom)
            positions[j][2] = positions[j][2] + SURF_DIST
            pos = positions[j]
            at.position = pos
            s.append(at)
            j = j + 1

        struct_pymatgen = adapt.get_structure(s)
        try:
            e_tot = model.predict_structure(struct_pymatgen)
            # print(e_tot)
        except:
            e_tot = 0.
            print("isolated molecule exception handled")
        if e_tot < e_tot_min:
            struct_out = s
            e_tot_min = e_tot

    print("e_tot min: ", e_tot_min)
    write(filename='best.in', images=struct_out, format="espresso-in")

    del model

    return struct_out
Exemplo n.º 5
0
def random_structure_group(symbols,
                           composition,
                           thickness,
                           tol_factor,
                           model_file,
                           dmin=2.0,
                           Natt=RANDOM_ATTEMPTS):
    # returns pyxtal generated structure (ase Atoms) with lowest e_tot according to megnet model
    tol_m_1 = Tol_matrix(prototype="atomic", factor=tol_factor)
    adapt = AseAtomsAdaptor()
    model = MEGNetModel.from_file(model_file)
    e_tot_min = 0.

    for i in range(Natt):
        group_id = randrange(80) + 1
        my_crystal = random_crystal_2D(group_id,
                                       symbols,
                                       composition,
                                       1.0,
                                       thickness=thickness,
                                       tm=tol_m_1)
        flag = 0
        if my_crystal.valid == True:
            struct = crystal_to_atoms(my_crystal)
            Nat = len(struct.get_chemical_symbols())
            struct_pymatgen = adapt.get_structure(struct)
            try:
                e_tot = model.predict_structure(struct_pymatgen)
            except:
                e_tot = 0.
                print("isolated molecule exception handled")
            struct2x2x1 = struct * (2, 2, 1)
            positions = struct2x2x1.get_positions()
            # positions = struct.get_positions()
            # print(struct)
            flag = check_dist(Nat * 2 * 2, positions, dmin)
            # print(flag)

            if (e_tot < e_tot_min) and flag == 1:
                struct_out = struct
                e_tot_min = e_tot

    print("e_tot/atom: " + str(e_tot_min))
    # write(filename="POSCAR.best.in", images=struct_out, format="espresso-in")
    # write(filename="POSCAR.best", images=struct_out, format="vasp")

    del model

    return struct_out
Exemplo n.º 6
0
def load_model(model_name):
    """
    load the model by user friendly name as in megnet.utils.models.AVAILABEL_MODELS

    Args:
        model_name: str model name string

    Returns: GraphModel

    """

    if model_name in AVAILABLE_MODELS:
        return MEGNetModel.from_file(MODEL_MAPPING[model_name])
    else:
        raise ValueError('model name %s not in available model list %s' % (model_name, AVAILABLE_MODELS))
Exemplo n.º 7
0
def show_layers(model_file):
    """
    show_layers(model_file)

    Displays information on layers of a pre-trained 
    MEGNet model. 

    Inputs:
        model_file-      A pre-trained MEGNet model file.

    Outputs:
         1-              Layers in the model file.
    """
    pretrained_model = MEGNetModel.from_file(model_file)
    print(pretrained_model.summary())
Exemplo n.º 8
0
def find_sub_tree(cur_tag, input_history_tag):
    ###### load model #######
    father_model_name = dump_model_name + '_' + input_history_tag + '.hdf5'
    #########################
    for db_str in cur_tag:
        history_tag = input_history_tag
        history_tag += '_'
        history_tag += db_str
        if special_path != '' and history_tag not in special_path:
            continue
        else:
            pass
        cur_model_name = dump_model_name + '_' + history_tag + '.hdf5'
        cur_model = MEGNetModel.from_file(father_model_name)
        ###### get dataset ######
        s, t = construct_dataset_from_str(db_str)
        l = len(s)
        ###### train ############
        try:
            cur_model.train(s[:int(0.8*l)], t[:int(0.8*l)],
                        validation_structures=s[int(0.8*l):],
                        validation_targets=t[int(0.8*l):],
                        callbacks=[callback],
                        save_checkpoint=False,
                        automatic_correction=False,
                        batch_size = 256,
                        epochs=ep)
        except TypeError:
            logging.info('MAE of {tag} is: {mae}'.format(tag=history_tag, mae='nan'))
        else:
            mae = prediction(cur_model)
            logging.info('MAE of {tag} is: {mae}'.format(tag=history_tag, mae=mae))
        cur_model.save_model(cur_model_name)
        del s, t, l
        gc.collect()
        ###### next level #######
        if len(cur_tag) > 1:
            tmp_tag = cur_tag
            next_tag = tmp_tag.replace(db_str, '')
            find_sub_tree(next_tag, history_tag)
        else:
            pass
Exemplo n.º 9
0
    def __init__(self,
                 model_name: Union[str, GraphModel,
                                   MEGNetModel] = DEFAULT_MODEL,
                 use_cache: bool = True):
        if isinstance(model_name, str):
            model = MEGNetModel.from_file(model_name)
        elif isinstance(model_name, GraphModel):
            model = model_name
        else:
            raise ValueError('model_name only support str '
                             'or GraphModel object')

        layers = model.layers
        important_prefix = ['meg', 'set', 'concatenate']

        all_names = [
            i.name for i in layers
            if any([i.name.startswith(j) for j in important_prefix])
        ]

        valid_outputs = [
            i.output for i in layers
            if any([i.name.startswith(j) for j in important_prefix])
        ]

        outputs = []
        valid_names = []
        for i, j in zip(all_names, valid_outputs):
            if isinstance(j, list):
                for k, l in enumerate(j):
                    valid_names.append(i + '_%d' % k)
                    outputs.append(l)
            else:
                valid_names.append(i)
                outputs.append(j)

        full_model = Model(inputs=model.inputs, outputs=outputs)
        model.model = full_model
        self.model = model
        self.valid_names = valid_names
        self._cache = {}
        self.use_cache = use_cache
Exemplo n.º 10
0
def random_structure_model(stoichio,
                           amin,
                           amax,
                           dmin,
                           model_file,
                           Natt=RANDOM_ATTEMPTS):
    # returns random structure (ase Atoms) with lowest e_tot according to megnet model
    adapt = AseAtomsAdaptor()
    model = MEGNetModel.from_file(model_file)
    e_tot_min = 0.
    flag = 0

    for i in range(Natt):
        struct = random_structure(stoichio, amin, amax, dmin)
        # Nat = len(struct.get_chemical_symbols())
        struct_pymatgen = adapt.get_structure(struct)
        try:
            e_tot = model.predict_structure(struct_pymatgen)
        except:
            e_tot = 0.
            print("isolated molecule exception handled")
        if e_tot < e_tot_min:
            struct_out = struct
            e_tot_min = e_tot
            flag = 1
    if flag == 0:
        print("Warning: structure not generated!")
        struct_out = Atoms(stoichio)
    if flag == 1:
        print("e_tot/atom: " + str(e_tot_min))
        write(filename='POSCAR_' + random_str(5) + '.in',
              images=struct_out,
              format="espresso-in")

    del model

    return struct_out
Exemplo n.º 11
0
    def load(cls: "MEGNetProbModel",
             save_path: PathLike,
             load_ckpt: bool = True) -> "MEGNetProbModel":
        """Load a MEGNetProbModel from disk.

        Args:
            save_path: The path to the model's save directory.
            load_ckpt: Whether to load the best checkpoint's weights, instead
                of those saved at the time of the last :meth:`save`.

        Returns:
            The loaded model.

        Raises:
            FileNotFoundError: If the ``save_path`` or any components do not exist.

        """
        paths = _get_save_paths(save_path)
        try:
            meg_model = MEGNetModel.from_file(str(paths["meg_path"]))
        except OSError:
            raise FileNotFoundError(
                f"No saved MEGNetModel at `{paths['meg_path']}`.")
        return super().load(save_path, load_ckpt, meg_model=meg_model)
Exemplo n.º 12
0
    def __init__(self, model_name=DEFAULT_MODEL, use_cache=True):
        model = MEGNetModel.from_file(model_name)
        layers = model.layers
        important_prefix = ['meg', 'set', 'concatenate']
        all_names = [i.name for i in layers if any([i.name.startswith(j) for j in important_prefix])]
        valid_outputs = [i.output for i in layers if any([i.name.startswith(j) for j in important_prefix])]

        outputs = []
        valid_names = []
        for i, j in zip(all_names, valid_outputs):
            if isinstance(j, list):
                for k, l in enumerate(j):
                    valid_names.append(i + '_%d' % k)
                    outputs.append(l)
            else:
                valid_names.append(i)
                outputs.append(j)

        full_model = Model(inputs=model.inputs, outputs=outputs)
        model.model = full_model
        self.model = model
        self.valid_names = valid_names
        self._cache = {}
        self.use_cache = use_cache
Exemplo n.º 13
0
Xtrain = inputs.iloc[0:boundary]['structure']
ytrain = inputs.iloc[0:boundary]['band_gap']

Xtest = inputs.iloc[boundary:]['structure']
ytest = inputs.iloc[boundary:]['band_gap']

nfeat_bond = 10
nfeat_global = 2
r_cutoff = 5
gaussian_centers = np.linspace(0, 5, 10)
gaussian_width = 0.5
distance_convertor = GaussianDistance(gaussian_centers, gaussian_width)
bond_convertor = CrystalGraph(bond_convertor=distance_convertor,
                              cutoff=r_cutoff)
graph_convertor = CrystalGraph(
    bond_convertor=GaussianDistance(np.linspace(0, 5, 10), 0.5))
model = MEGNetModel(nfeat_bond, nfeat_global, graph_convertor=graph_convertor)

model.from_file('fitted_gap_model.hdf5')

model.train(Xtrain,
            ytrain,
            epochs=epochs,
            batch_size=batch_size,
            validation_structures=Xtest,
            validation_targets=ytest,
            scrub_failed_structures=True)

model.save_model('fitted_gap_model.hdf5')
Exemplo n.º 14
0
    MAE = 0
    test_size = len(test_structures)
    for i in range(test_size):
        MAE += abs(model.predict_structure(test_structures[i]).ravel() - test_targets[i])
    MAE /= test_size
    print('MAE is:', MAE)

training_mode = int(sys.argv[1])

# data preprocess part


if True:
    import pickle
    # load the past if needed
    model = MEGNetModel.from_file('6a34b94_9_2.hdf5')
    idx = 0
    for sz in data_size[:-1]:
        ME = 0
        error_lst = []
        for i in range(idx, idx + sz):
            e = (model.predict_structure(structures[i]).ravel() - targets[i])
            ME += e
            error_lst.append(e)
            if abs(e) > 0.5:
                targets[i] = model.predict_structure(structures[i]).ravel()
            # targets[i] = (model.predict_structure(structures[i]).ravel() + targets[i])/2
        ME /= sz
        f = open(str(sz) + 'txt', 'wb')
        pickle.dump(error_lst, f)
        f.close()
Exemplo n.º 15
0
    def k_fold(datadir, fold, prop, layer, activations_input_full, train_idx,
               val_idx, Xpool, perp, ndims, niters):
        """
        latent.k_fold(datadir, fold, prop, layer, activations_input_full, 
                      train_idx, val_idx, Xpool, perp, ndims, niters)
        
        tSNE analysis or feature scaling of the activations of a layer of a 
        neural network for k-fold cross-validation. 

        Inputs:
        datadir-                   Directory into which results are written into.
        fold-                      Number of fold to be processed.
        prop-                      Optical property of interest.  
        layer-                     Layer of a MEGNet model of interest. 
        activations_input_full-    Input to the specific layer for  extraction 
                                   of activations for the full dataset. 
        train_idx-                 Indices to extract training set from the pool.
        val_idx-                   Indices to extrct validation set from the pool.
        Xpool-                     Structures in pool.     
        perp-                      Perplexity value for tSNE analysis. 
        ndims-                     Dimensions of embedded space.  
        niters-                    The maximum number of iterations for tSNE 
                                   optimisation.
        
        Outputs:
        1-                         GP latent points for the training, validation,
                                   and test sets.
        """
        if ndims > 3:
            logging.error("0 <= ndims < 4 !")
            sys.exit()
        model_pretrained = MEGNetModel.from_file("%s/fitted_%s_model.hdf5" %
                                                 (datadir, prop))

        logging.info("Extracting activations from the %s layer ..." % layer)
        net_layer = [
            i.output for i in model_pretrained.layers
            if i.name.startswith("%s" % layer)
        ]
        compute_graph = K.function([model_pretrained.input], [net_layer])
        extracted_activations_full = []
        for full in activations_input_full:
            extracted_activations_full.append(compute_graph(full))

        activations = np.array(np.squeeze(extracted_activations_full))
        if ndims in (0, 1):
            if np.ndim(activations) > 2:
                logging.error(
                    "Dimension of extracted activations > 2 so apply tSNE instead!"
                )
                sys.exit()
            if ndims == 0:
                logging.info(
                    "No pre-processing on the extracted activations ...")
                latent_full = activations
            elif ndims == 1:
                logging.info("Scaling each feature to range 0, 1 ...")
                from sklearn.preprocessing import MinMaxScaler

                latent_full = MinMaxScaler().fit(activations).transform(
                    activations)
        elif ndims > 1:
            logging.info("Dimensionality reduction using tSNE begins ...")
            print("Requested number of components = ", ndims)
            print("Using max iterations = ", niters)
            print("Processing perplexity = ", perp)
            from sklearn.manifold import TSNE

            latent_full = TSNE(n_components=ndims,
                               n_iter=niters,
                               n_jobs=-1,
                               random_state=0,
                               perplexity=perp).fit_transform(activations)

        logging.info("Writing results to file ...")
        np.save("%s/latent_full.npy" % datadir, latent_full)

        latent_pool = latent_full[:len(Xpool)]
        np.save("%s/latent_pool.npy" % datadir, latent_pool)

        latent_train = latent_pool[train_idx]
        np.save("%s/latent_train.npy" % datadir, latent_train)

        latent_val = latent_pool[val_idx]
        np.save("%s/latent_val.npy" % datadir, latent_val)

        latent_test = latent_full[len(Xpool):]
        np.save("%s/latent_test.npy" % datadir, latent_test)

        return latent_train, latent_val, latent_test
Exemplo n.º 16
0
def find_sub_tree(cur_tag, history_tag):
    global trained_last_time
    if init_model_tag == start_model_tag or trained_last_time == False:
        trained_last_time = False
        ###### load model #######
        father_model_name = dump_model_name + '_' + history_tag + '.hdf5'
        history_tag += '_'
        history_tag += cur_tag
        if special_path != '' and history_tag not in special_path:
            return
        else:
            pass

        if contain_e1_in_every_node:
            history_tag += 'E1'
        cur_model_name = dump_model_name + '_' + history_tag + '.hdf5'
        cur_model = MEGNetModel.from_file(father_model_name)
        ###### get dataset ######
        s, t = construct_dataset_from_str(cur_tag)
        l = len(s)
        ###### train ############
        try:
            cur_model.train(s[:int(0.8 * l)],
                            t[:int(0.8 * l)],
                            validation_structures=s[int(0.8 * l):],
                            validation_targets=t[int(0.8 * l):],
                            callbacks=[callback],
                            save_checkpoint=False,
                            automatic_correction=False,
                            batch_size=256,
                            epochs=ep)
        except TypeError:
            logging.info('MAE of {tag} is: {mae}'.format(tag=history_tag,
                                                         mae='nan'))
        else:
            mae = prediction(cur_model, test_structures, test_targets)
            logging.info('Ordered structures MAE of {tag} is: {mae}'.format(
                tag=history_tag, mae=mae))
            mae = prediction(cur_model, s_exp_disordered, t_exp_disordered)
            logging.info('Disordered structures MAE of {tag} is: {mae}'.format(
                tag=history_tag, mae=mae))
        cur_model.save_model(cur_model_name)
        del s, t, l
        gc.collect()
    else:
        logging.info('cur_tag is {ct}, trained_last_time is {e}'.format(
            ct=str(cur_tag), e=str(trained_last_time)))
    ###### next level #######
    if len(cur_tag) > 1:
        for i in range(len(cur_tag)):
            next_tag = cur_tag[:i] + cur_tag[i + 1:]
            find_sub_tree(next_tag, history_tag)
    elif contain_e1_in_every_node and trained_last_time == False:
        ####### extra E1 training ##
        s, t = construct_dataset_from_str('')
        l = len(s)
        try:
            cur_model.train(s[:int(0.8 * l)],
                            t[:int(0.8 * l)],
                            validation_structures=s[int(0.8 * l):],
                            validation_targets=t[int(0.8 * l):],
                            callbacks=[callback],
                            save_checkpoint=False,
                            automatic_correction=False,
                            batch_size=256,
                            epochs=ep)
        except TypeError:
            logging.info('MAE of {h}_E1 is: {mae}'.format(h=history_tag,
                                                          mae='nan'))
        else:
            mae = prediction(cur_model, test_structures, test_targets)
            logging.info('Ordered structures MAE of {tag} is: {mae}'.format(
                tag=history_tag, mae=mae))
            mae = prediction(cur_model, s_exp_disordered, t_exp_disordered)
            logging.info('Disordered structures MAE of {tag} is: {mae}'.format(
                tag=history_tag, mae=mae))
        cur_model.save_model(dump_model_name + '_' + history_tag + '_E1.hdf5')
    else:
        pass
Exemplo n.º 17
0
 def setUpClass(cls):
     cls.molecule = Molecule(["C", "O", "O"],
                             [[0, 0, 0], [-1, 0, 0], [1, 0, 0]])
     cls.model = MEGNetModel.from_file(
         os.path.join(
             CWD, "../../../mvl_models/mp-2019.4.1/formation_energy.hdf5"))
Exemplo n.º 18
0
    def train_test_split(datadir, prop, layer, activations_input_full, Xpool,
                         ytest, perp, ndims, niters):
        """
        latent.train_test_split(datadir, prop, layer, activations_input_full, 
                                Xpool, ytest, perp, ndims, niters)

        tSNE analysis or feature scaling of the activations of a layer of a 
        neural network.

        Inputs:
        datadir-                   Directory into which results are written into.
        prop-                      Optical property of interest.
        layer-                     Layer of a MEGNet model of interest. 
        activations_input_full-    Input to the specific layer for 
                                   extraction of activations for the full dataset. 
        Xpool-                     Structures in pool.
        ytest-                     Targets in the test set. 
        perp-                      Perplexity value for tSNE analysis.
        ndims-                     Dimensions of embedded space.
        niters-                    The maximum number of iterations for 
                                   tSNE optimisation.

        Outputs:
        1-                         GP latent points for the pool and test sets. 
        """
        if ndims > 3:
            logging.error("0 <= ndims < 4!")
            sys.exit()
        model_pretrained = MEGNetModel.from_file("%s/fitted_%s_model.hdf5" %
                                                 (datadir, prop))

        logging.info("Extracting activations from the %s layer ..." % layer)
        net_layer = [
            i.output for i in model_pretrained.layers
            if i.name.startswith("%s" % layer)
        ]
        compute_graph = K.function([model_pretrained.input], [net_layer])
        extracted_activations_full = []
        for full in activations_input_full:
            extracted_activations_full.append(compute_graph(full))

        activations = np.array(np.squeeze(extracted_activations_full))
        if ndims in (0, 1):
            if np.ndim(activations) > 2:
                logging.error(
                    "Dimension of extracted activations > 2 so apply tSNE instead!"
                )
                sys.exit()
            if ndims == 0:
                logging.info(
                    "No pre-processing on the extracted activations ...")
                latent_full = activations
            elif ndims == 1:
                logging.info("Scaling each feature to range 0, 1 ...")
                from sklearn.preprocessing import MinMaxScaler

                latent_full = MinMaxScaler().fit(activations).transform(
                    activations)
        elif ndims > 1:
            logging.info("Dimensionality reduction using tSNE begins ...")
            print("Requested number of components = ", ndims)
            print("Using max iterations = ", niters)
            print("Processing perplexity = ", perp)
            from sklearn.manifold import TSNE

            latent_full = TSNE(n_components=ndims,
                               n_iter=niters,
                               n_jobs=-1,
                               random_state=0,
                               perplexity=perp).fit_transform(activations)

        latent_pool = latent_full[:len(Xpool)]
        latent_test = latent_full[len(Xpool):]

        logging.info("Writing results to file ...")
        np.save("%s/latent_full.npy" % datadir, latent_full)
        np.save("%s/latent_pool.npy" % datadir, latent_pool)
        np.save("%s/latent_test.npy" % datadir, latent_test)

        if ndims == 0:
            logging.info("Saving extracted activations plot ...")
            plt.figure(figsize=[12, 6])
            plt.title("Raw activations from %s layer" % layer)
            plt.scatter(latent_test[:, 0], latent_test[:, 1], c=ytest)
            plt.savefig("%s/activations_%s.pdf" % (datadir, prop))
        elif ndims == 1:
            logging.info("Saving scaled activations plot ...")
            plt.figure(figsize=[12, 6])
            plt.title("Scaled activations from %s layer" % layer)
            plt.scatter(latent_test[:, 0], latent_test[:, 1], c=ytest)
            plt.savefig("%s/activations_%s.pdf" % (datadir, prop))
        elif ndims > 1:
            logging.info("Saving tSNE plots ...")
            if ndims == 2:
                plt.figure(figsize=[12, 6])
                plt.title(
                    "tSNE transformed activations of %s layer \nNumber of iterations = %s \nperplexity = %s"
                    % (layer, niters, perp))
                plt.scatter(latent_test[:, 0], latent_test[:, 1], c=ytest)
            elif ndims == 3:
                from mpl_toolkits.mplot3d import Axes3D
                fig = plt.figure(figsize=[14, 6])
                ax = fig.add_subplot(111, projection="3d")
                plt.title(
                    "tSNE transformed activations of %s layer \nNumber of iterations = %s \nperplexity = %s"
                    % (layer, niters, perp))
                ax.scatter(latent_test[:, 0],
                           latent_test[:, 1],
                           latent_test[:, 2],
                           c=ytest)

            plt.savefig("%s/tSNE_%s.pdf" % (datadir, prop))

        return latent_pool, latent_test
Exemplo n.º 19
0
    s, t = zip(*c)
    return s, t

       
# pbe_energy = prediction(model, structures['pbe'], targets['pbe'])
# ordered_energy = prediction(model, test_structures, test_targets)
# disordered_energy = prediction(model, s_exp_disordered, t_exp_disordered)
# 
# logging.info('Prediction before trainnig, MAE of \
#         pbe: {pbe}; ordered: {ordered}; disordered: {disordered}.'.format(
#     pbe=pbe_energy, ordered=ordered_energy, disordered=disordered_energy))

# find_sub_tree(init_model_tag, 'init_randomly')


cur_model_0 = MEGNetModel.from_file(old_model_name_0)
cur_model_1 = MEGNetModel.from_file(old_model_name_1)

import matplotlib.pyplot as plt
plt.figure(0)
from scipy import stats
font = {'size': 16, 'family': 'Arial'}
plt.rc('font', **font)
plt.rcParams['mathtext.rm'] = 'Arial'
plt.rcParams['pdf.fonttype'] = 42

fig, ax = plt.subplots()

plot_output_exp_err(cur_model_0, test_structures, test_targets, ax)
plot_output_exp_err(cur_model_1, structures['E1'], targets['E1'], ax)
Exemplo n.º 20
0
if swap_E1_test:
    structures['E1'], test_structures = test_structures, structures['E1']
    targets['E1'], test_targets = test_targets, targets['E1']

logging.info('dataset EXP, element dict: {d}'.format(item=it, d=Counter(sp_lst)))

logging.info(str(structures.keys()) + str(targets.keys()))
for k in structures.keys():
    logging.info(str(len(structures[k])) + str(len(targets[k])))

# data preprocess part
if load_old_model_enable:
    import pickle
    # load the past if needed
    model = MEGNetModel.from_file(old_model_name)
    if predict_before_dataclean:
        prediction(model)
    diff_lst = []
    for i in range(len(s_exp)):
        diff_lst.append(model.predict_structure(s_exp[i]).ravel() - t_exp[i])
    logging.info('Std of the list(model output - exp data) is: {std}, \
mean is: {mean}'.format(std=np.std(diff_lst),
                mean=np.mean(diff_lst)))

    for it in items:
        error_lst = []
        prediction_lst = []
        targets_lst = []
        for i in range(len(structures[it])):
            prdc = model.predict_structure(structures[it][i]).ravel()
Exemplo n.º 21
0
def main() -> None:
    """Execute main script."""
    parser = ArgumentParser()
    parser.add_argument(
        "--train",
        action="store_true",
        help="Whether to train the model.",
        dest="do_train",
    )
    parser.add_argument(
        "--eval",
        action="store_true",
        help="Whether to evaluate the model.",
        dest="do_eval",
    )
    parser.add_argument(
        "--which",
        choices=["MEGNet", "VGP", "ProbNN"],
        required=("--train" in sys.argv),
        help=(
            "Which components to train: "
            "MEGNet -- Just the MEGNetModel; "
            "VGP -- Just the VGP part of the ProbNN; "
            "ProbNN -- The whole ProbNN."
        ),
        dest="which",
    )
    parser.add_argument(
        "--epochs",
        "-n",
        type=int,
        required=("--train" in sys.argv),
        help="Number of training epochs.",
        dest="epochs",
    )
    parser.add_argument(
        "--inducing",
        "-i",
        type=int,
        help="Number of inducing index points.",
        default=500,
        dest="num_inducing",
    )
    args = parser.parse_args()

    do_train: bool = args.do_train
    do_eval: bool = args.do_eval
    which_model: str = args.which
    epochs: int = args.epochs
    num_inducing: int = args.num_inducing

    # Load the MEGNetModel into memory
    try:
        meg_model: MEGNetModel = MEGNetModel.from_file(str(MEGNET_MODEL_DIR))
    except FileNotFoundError:
        meg_model = MEGNetModel(**default_megnet_config())

    # Load the data into memory
    df = download_data(PHONONS_URL, PHONONS_SAVE_DIR)
    structures = df["structure"]
    targets = df["last phdos peak"]
    num_data = len(structures)
    print(f"{num_data} datapoints loaded.")

    num_training = floor(num_data * TRAINING_RATIO)
    print(f"{num_training} training data, {num_data-num_training} test data.")
    train_structs = structures[:num_training]
    train_targets = targets[:num_training]
    test_structs = structures[num_training:]
    test_targets = targets[num_training:]

    if which_model == "MEGNet":
        if do_train:
            tf_callback = TensorBoard(MEGNET_LOGS / NOW, write_graph=False)
            meg_model.train(
                train_structs,
                train_targets,
                test_structs,
                test_targets,
                automatic_correction=False,
                dirname="meg_checkpoints",
                epochs=epochs,
                callbacks=[tf_callback],
                verbose=VERBOSITY,
            )
            meg_model.save_model(str(MEGNET_MODEL_DIR))
        if do_eval:
            train_predicted = meg_model.predict_structures(train_structs).flatten()
            train_mae = MAE(train_predicted, None, train_targets)
            metric_logger.info("MEGNet train MAE = %f", train_mae)

            test_predicted = meg_model.predict_structures(test_structs).flatten()
            test_mae = MAE(test_predicted, None, test_targets)
            metric_logger.info("MEGNet test MAE = %f", test_mae)
    else:
        # Load the ProbNN into memory
        try:
            prob_model: MEGNetProbModel = MEGNetProbModel.load(PROB_MODEL_DIR)
        except FileNotFoundError:
            prob_model = MEGNetProbModel(meg_model, num_inducing, metrics=["MAE"])

        if do_train:
            if which_model == "VGP":
                prob_model.set_frozen("NN", recompile=False)
                prob_model.set_frozen(["VGP", "Norm"], freeze=False)
                tf_callback = TensorBoard(VGP_LOGS / NOW, write_graph=False)
            else:
                prob_model.set_frozen(["VGP", "NN", "Norm"], freeze=False)
                tf_callback = TensorBoard(FULL_MODEL_LOGS / NOW, write_graph=False)
            prob_model.train(
                train_structs,
                train_targets,
                epochs,
                test_structs,
                test_targets,
                callbacks=[tf_callback],
                verbose=VERBOSITY,
            )
            prob_model.save(PROB_MODEL_DIR)
        if do_eval:
            train_metrics = evaluate_uq_metrics(
                prob_model, train_structs, train_targets
            )
            log_metrics(train_metrics, "training")
            test_metrics = evaluate_uq_metrics(prob_model, test_structs, test_targets)
            log_metrics(test_metrics, "test")
Exemplo n.º 22
0
import pandas as pd
import json
from tqdm import tqdm
from sklearn.metrics import mean_absolute_error

inputs = pd.read_pickle('./band_gap_data.pkl')


boundary = int(len(inputs)*0.75)
epochs = 5
batch_size=56

Xtrain = inputs.iloc[0:boundary]['structure'] 
ytrain = inputs.iloc[0:boundary]['band_gap'] 

Xtest = inputs.iloc[boundary:]['structure'] 
ytest = inputs.iloc[boundary:]['band_gap'] 

for j in range(5):
    model = MEGNetModel.from_file('../entropy/0%s_model/fitted_band_gap_model.hdf5' % j)
    model.load_weights('../entropy/0%s_model/model-best-new-band_gap.h5' % j)
    preds = []
    vals = []
    for i in tqdm(range(len(Xtrain[-1000:]))):
        if ytrain[i] > 0:
            bg = model.predict_structure(Xtrain[i])
            preds.append(bg)
            vals.append(ytrain[i])
    print(mean_absolute_error(preds, vals))

Exemplo n.º 23
0
 def generate_new_population_alg2_substrate(self, population):
     # generates new population using algorithm no. 2
     print("")
     print("generating new population with softmutation ...")
     print("")
     new_population = []
     population.sort()
     Npop = self.Npop
     adapt = AseAtomsAdaptor()
     model = MEGNetModel.from_file(self.model_file)
     for i in range(int(Npop / 4)):  # 1/4 of population by atoms switch
         indx = randrange(Npop /
                          4)  # +1 # using only best 25% of population
         print("-------------------------------")
         print("new ind. from no. " + str(indx) + " by atoms switch")
         individual = new_by_switch_atoms_on_substrate(population[indx])
         individual.origin = "new ind. from no. " + str(
             indx) + " by atoms switch"
         new_population.append(individual)
     for i in range(int(Npop /
                        4)):  # 1/4 of population by kind of softmutation
         indx = randrange(Npop /
                          4)  # +1 # using only best 25% of population
         print("-------------------------------")
         print("new ind. from no. " + str(indx) + " by softmutation")
         ind_in = population[indx]
         structure_ase = ind_in.get_relaxed_structure().copy()
         structure_pymatgen = adapt.get_structure(structure_ase)
         e_tot_in = model.predict_structure(structure_pymatgen)
         e_tot_min = e_tot_in
         flag = 0
         for i in range(100):
             individual = new_by_shift_coordinate(population[indx])
             structure_ase = individual.get_init_structure().copy()
             structure_pymatgen = adapt.get_structure(structure_ase)
             try:
                 e_tot = model.predict_structure(structure_pymatgen)
             except:
                 e_tot = 0.
                 print("isolated molecule exception handled")
             if e_tot < e_tot_min:
                 flag = 1
                 e_tot_min = e_tot
                 ind_out = individual
         ind_out.origin = "new ind. from no. " + str(
             indx) + " by softmutation"
         if flag == 1:
             new_population.append(ind_out)
             print("softmutate energy gain: " + str(e_tot_min - e_tot_in))
         if flag == 0:
             new_population.append(ind_in)
     for i in range(int((Npop / 2) - 1)):
         # 1/2 of population (minus one) by new random structures, pyxtal+model
         # indx = randrange(Npop/2)+1
         print("-------------------------------")
         print("new ind. random from scratch")
         struct = random_structure_group(self.chem_sym,
                                         self.stoichio,
                                         thickness=3.0,
                                         tol_factor=0.9,
                                         model_file=self.model_file,
                                         dmin=self.dmin,
                                         Natt=RANDOM_ATTEMPTS)
         individual = Individual(struct)
         individual.origin = "new ind. random from scratch"
         new_population.append(individual)
     best_ind = get_best_individual(
         population)  # last one is the best in the current population
     best_ind.origin = "kept best"
     new_population.append(best_ind)
     return new_population
Exemplo n.º 24
0
from megnet.models import MEGNetModel
model_form = MEGNetModel.from_file(
    '/home/vol00/scarf690/src/megnet/mvl_models/mp-2018.6.1/band_gap_regression.hdf5'
)
Exemplo n.º 25
0
from megnet.models import MEGNetModel
from megnet.data.graph import GaussianDistance
from megnet.data.crystal import CrystalGraph
from keras.callbacks import ModelCheckpoint
import numpy as np
import pandas as pd
import json

inputs = pd.read_pickle('./band_gap_data.pkl')

boundary = int(len(inputs) * 0.75)
epochs = 5
batch_size = 56

Xtrain = inputs.iloc[0:boundary]['structure']
ytrain = inputs.iloc[0:boundary]['band_gap']

Xtest = inputs.iloc[boundary:]['structure']
ytest = inputs.iloc[boundary:]['band_gap']

model_form = MEGNetModel.from_file('./fitted_gap_model.hdf5')

for i in range(10):
    bg = model.predict_structure(Xtrain[i])
    print(bg, ytrain[i])