Exemplo n.º 1
0
def prepare_model_megnet(individuals, epochs, outfile, excl=[]):
    # prepares model file
    # prepares Megnet model based on list of individuals
    # uses total energy per atom
    # excl - excluding particular stoichiometry - important for network learning
    structures = []
    energies = []
    adapt = AseAtomsAdaptor()
    empty = 0
    if not excl:
        empty = 1

    i = 0
    for ind in individuals:
        struct_ase = ind.get_init_structure()
        chem_sym = struct_ase.get_chemical_symbols()
        e_tot = ind.e_tot
        struct_pymatgen = adapt.get_structure(struct_ase)
        flag = 1
        if empty == 0 and chem_sym == excl:
            flag = 0

        if flag == 1:
            structures.append(struct_pymatgen)
            energies.append(e_tot)
            i = i + 1

    print("read data of " + str(i) + " structures total")

    # standard vales as taken from Megnet manual
    nfeat_bond = 100
    nfeat_global = 2
    r_cutoff = 5
    gaussian_centers = np.linspace(0, r_cutoff + 1, nfeat_bond)
    gaussian_width = 0.5
    distance_converter = GaussianDistance(gaussian_centers, gaussian_width)
    graph_converter = CrystalGraph(bond_converter=distance_converter, cutoff=r_cutoff)
    model = MEGNetModel(nfeat_bond, nfeat_global, graph_converter=graph_converter)

    # model training
    model.train(structures, energies, epochs=epochs)

    model.save_model(outfile)
Exemplo n.º 2
0
                        train_targets,
                        val_graphs,
                        val_targets,
                        epochs=EPOCHS,
                        verbose=2,
                        initial_epoch=0,
                        callbacks=callbacks)

#  6. Model testing

##  load the best model with lowest validation error
files = glob("./callback/*.hdf5")
best_model = sorted(files, key=os.path.getctime)[-1]

model.load_weights(best_model)
model.save_model("best_model.hdf5")


def evaluate(test_graphs, test_targets):
    """
    Evaluate the test errors using test_graphs and test_targets

    Args:
        test_graphs (list): list of graphs
        test_targets (list): list of target properties

    Returns:
        mean absolute errors
    """
    test_data = model.graph_converter.get_flat_data(test_graphs, test_targets)
    gen = GraphBatchDistanceConvert(
Exemplo n.º 3
0
                targets[it][i] = prdc
            # targets[i] = (model.predict_structure(structures[i]).ravel() + targets[i])/2
        logging.info('Data count: {dc}, std orig dft value: {std_orig}, std of model output: {std_model}'.format(
            dc=len(targets_lst), std_orig=np.std(targets_lst), std_model=np.std(prediction_lst)))
        logging.info('Data count: {dc}, Mean orig: {mean_orig}, Mean_model: {mean_model}'.format(
            dc=len(targets_lst), mean_orig=np.mean(targets_lst), mean_model=np.mean(prediction_lst)))
        f = open(dump_model_name + '_'+ it + '.txt', 'wb') # to store and analyze the error
        pickle.dump(error_lst, f)
        f.close()

# model = MEGNetModel(10, 2, nblocks=3, lr=1e-3,
#         n1=4, n2=4, n3=4, npass=1, ntarget=1,
#         graph_converter=CrystalGraph(bond_converter=GaussianDistance(np.linspace(0, 5, 10), 0.5)))

model = MEGNetModel(nfeat_edge=10, nfeat_global=2, graph_converter=CrystalGraph(bond_converter=GaussianDistance(np.linspace(0, 5, 10), 0.5)))
model.save_model(dump_model_name+'_1by1_init_randomly' + '.hdf5')
init_model_tag = 'EGPHS'

ep = 5000
callback = tf.keras.callbacks.EarlyStopping(monitor="val_loss", patience=10, restore_best_weights=True)

for s in test_structures:
    test_input.append(model.graph_converter.graph_to_input(model.graph_converter.convert(s)))

db_short_full_dict = {'G': 'gllb-sc', 'H': 'hse', 'S': 'scan', 'P': 'pbe', 'E': 'E1'}

def construct_dataset_from_str(db_short_str):
    s = []
    t = []
    for i in range(len(db_short_str)):
        s.extend(structures[db_short_full_dict[db_short_str[i]]])
Exemplo n.º 4
0
Xtrain = inputs.iloc[0:boundary]['structure']
ytrain = inputs.iloc[0:boundary]['band_gap']

Xtest = inputs.iloc[boundary:]['structure']
ytest = inputs.iloc[boundary:]['band_gap']

nfeat_bond = 10
nfeat_global = 2
r_cutoff = 5
gaussian_centers = np.linspace(0, 5, 10)
gaussian_width = 0.5
distance_convertor = GaussianDistance(gaussian_centers, gaussian_width)
bond_convertor = CrystalGraph(bond_convertor=distance_convertor,
                              cutoff=r_cutoff)
graph_convertor = CrystalGraph(
    bond_convertor=GaussianDistance(np.linspace(0, 5, 10), 0.5))
model = MEGNetModel(nfeat_bond, nfeat_global, graph_convertor=graph_convertor)

model.from_file('fitted_gap_model.hdf5')

model.train(Xtrain,
            ytrain,
            epochs=epochs,
            batch_size=batch_size,
            validation_structures=Xtest,
            validation_targets=ytest,
            scrub_failed_structures=True)

model.save_model('fitted_gap_model.hdf5')
Exemplo n.º 5
0
                epochs=ep,
                save_checkpoint=False,
                automatic_correction=False)
        idx += data_size[i]
        prediction(model)
elif training_mode == 6: # PBE -> HSE ... -> part EXP, one by one, with 20% validation
    idx = 0
    for i in range(len(data_size)):
        model.train(structures[idx:idx+int(0.8*data_size[i])], targets[idx:idx+int(0.8*data_size[i])],
                validation_structures=structures[idx+int(0.8*data_size[i]):(idx+data_size[i])],
                validation_targets=targets[idx+int(0.8*data_size[i]):(idx+data_size[i])],
                callbacks=[callback, XiaotongCB((test_input, test_targets), commit_id)],
                epochs=ep,
                save_checkpoint=False,
                automatic_correction=False)
        model.save_model(commit_id+'_'+str(training_mode)+'_'+str(i)+'.hdf5')
        idx += data_size[i]
        prediction(model)
elif training_mode == 7: # all training set together with 20% validation
    l = len(structures)
    c = list(zip(structures, targets))
    random.shuffle(c)
    structures, targets = zip(*c)
    model.train(structures[:int(0.8 * l)], targets[:int(0.8 * l)],
            validation_structures=structures[int(0.8 * l):],
            validation_targets=targets[int(0.8 * l):],
            callbacks=[callback, XiaotongCB((test_input, test_targets), commit_id)],
            epochs=ep*len(data_size),
            save_checkpoint=False,
            automatic_correction=False)
    prediction(model)
Exemplo n.º 6
0
def main() -> None:
    """Execute main script."""
    parser = ArgumentParser()
    parser.add_argument(
        "--train",
        action="store_true",
        help="Whether to train the model.",
        dest="do_train",
    )
    parser.add_argument(
        "--eval",
        action="store_true",
        help="Whether to evaluate the model.",
        dest="do_eval",
    )
    parser.add_argument(
        "--which",
        choices=["MEGNet", "VGP", "ProbNN"],
        required=("--train" in sys.argv),
        help=(
            "Which components to train: "
            "MEGNet -- Just the MEGNetModel; "
            "VGP -- Just the VGP part of the ProbNN; "
            "ProbNN -- The whole ProbNN."
        ),
        dest="which",
    )
    parser.add_argument(
        "--epochs",
        "-n",
        type=int,
        required=("--train" in sys.argv),
        help="Number of training epochs.",
        dest="epochs",
    )
    parser.add_argument(
        "--inducing",
        "-i",
        type=int,
        help="Number of inducing index points.",
        default=500,
        dest="num_inducing",
    )
    args = parser.parse_args()

    do_train: bool = args.do_train
    do_eval: bool = args.do_eval
    which_model: str = args.which
    epochs: int = args.epochs
    num_inducing: int = args.num_inducing

    # Load the MEGNetModel into memory
    try:
        meg_model: MEGNetModel = MEGNetModel.from_file(str(MEGNET_MODEL_DIR))
    except FileNotFoundError:
        meg_model = MEGNetModel(**default_megnet_config())

    # Load the data into memory
    df = download_data(PHONONS_URL, PHONONS_SAVE_DIR)
    structures = df["structure"]
    targets = df["last phdos peak"]
    num_data = len(structures)
    print(f"{num_data} datapoints loaded.")

    num_training = floor(num_data * TRAINING_RATIO)
    print(f"{num_training} training data, {num_data-num_training} test data.")
    train_structs = structures[:num_training]
    train_targets = targets[:num_training]
    test_structs = structures[num_training:]
    test_targets = targets[num_training:]

    if which_model == "MEGNet":
        if do_train:
            tf_callback = TensorBoard(MEGNET_LOGS / NOW, write_graph=False)
            meg_model.train(
                train_structs,
                train_targets,
                test_structs,
                test_targets,
                automatic_correction=False,
                dirname="meg_checkpoints",
                epochs=epochs,
                callbacks=[tf_callback],
                verbose=VERBOSITY,
            )
            meg_model.save_model(str(MEGNET_MODEL_DIR))
        if do_eval:
            train_predicted = meg_model.predict_structures(train_structs).flatten()
            train_mae = MAE(train_predicted, None, train_targets)
            metric_logger.info("MEGNet train MAE = %f", train_mae)

            test_predicted = meg_model.predict_structures(test_structs).flatten()
            test_mae = MAE(test_predicted, None, test_targets)
            metric_logger.info("MEGNet test MAE = %f", test_mae)
    else:
        # Load the ProbNN into memory
        try:
            prob_model: MEGNetProbModel = MEGNetProbModel.load(PROB_MODEL_DIR)
        except FileNotFoundError:
            prob_model = MEGNetProbModel(meg_model, num_inducing, metrics=["MAE"])

        if do_train:
            if which_model == "VGP":
                prob_model.set_frozen("NN", recompile=False)
                prob_model.set_frozen(["VGP", "Norm"], freeze=False)
                tf_callback = TensorBoard(VGP_LOGS / NOW, write_graph=False)
            else:
                prob_model.set_frozen(["VGP", "NN", "Norm"], freeze=False)
                tf_callback = TensorBoard(FULL_MODEL_LOGS / NOW, write_graph=False)
            prob_model.train(
                train_structs,
                train_targets,
                epochs,
                test_structs,
                test_targets,
                callbacks=[tf_callback],
                verbose=VERBOSITY,
            )
            prob_model.save(PROB_MODEL_DIR)
        if do_eval:
            train_metrics = evaluate_uq_metrics(
                prob_model, train_structs, train_targets
            )
            log_metrics(train_metrics, "training")
            test_metrics = evaluate_uq_metrics(prob_model, test_structs, test_targets)
            log_metrics(test_metrics, "test")
Exemplo n.º 7
0
                        train_targets,
                        val_graphs,
                        val_targets,
                        epochs=EPOCHS,
                        verbose=2,
                        initial_epoch=0,
                        callbacks=callbacks)

#  6. Model testing

##  load the best model with lowest validation error
files = glob('./callback/*.hdf5')
best_model = sorted(files, key=os.path.getctime)[-1]

model.load_weights(best_model)
model.save_model('best_model.hdf5')


def evaluate(test_graphs, test_targets):
    """
    Evaluate the test errors using test_graphs and test_targets
    
    Args:
        test_graphs (list): list of graphs
        test_targets (list): list of target properties
        
    Returns:
        mean absolute errors
    """
    test_data = model.graph_converter.get_flat_data(test_graphs, test_targets)
    gen = GraphBatchDistanceConvert(
Exemplo n.º 8
0
     for i in range(len(data_size)):
         model.train(
             structures[idx:idx + int(0.8 * data_size[i])],
             targets[idx:idx + int(0.8 * data_size[i])],
             validation_structures=structures[idx +
                                              int(0.8 * data_size[i]):(
                                                  idx + data_size[i])],
             validation_targets=targets[idx + int(0.8 * data_size[i]):(
                 idx + data_size[i])],
             # callbacks=[callback, XiaotongCB((test_input, test_targets), commit_id)],
             callbacks=[callback],
             epochs=ep,
             save_checkpoint=False,
             batch_size=512,
             automatic_correction=False)
         model.save_model(dump_model_name + '_' + str(i) + '.hdf5')
         idx += data_size[i]
         prediction(model)
 elif training_mode == 11:  # PBE -> HSE ... -> part EXP, one by one, with 20% of last dataset as validation
     idx = 0
     for i in range(len(data_size)):
         model.train(
             structures[idx:idx + int(0.8 * data_size[i])],
             targets[idx:idx + int(0.8 * data_size[i])],
             validation_structures=structures[sum(data_size[:-1]) +
                                              int(0.8 * data_size[-1]):],
             validation_targets=targets[sum(data_size[:-1]) +
                                        int(0.8 * data_size[-1]):],
             # validation_structures=structures[idx+int(0.8*data_size[i]):(idx+data_size[i])],
             # validation_targets=targets[idx+int(0.8*data_size[i]):(idx+data_size[i])],
             # callbacks=[callback, XiaotongCB((test_input, test_targets), commit_id)],