Пример #1
0
def get_wann_electron(jid="JVASP-816"):
    """Download electron WTBH if available."""
    w = ""
    ef = ""
    fls = data("raw_files")
    for i in fls["WANN"]:
        if i["name"].split(".zip")[0] == jid:
            r = requests.get(i["download_url"])
            z = zipfile.ZipFile(io.BytesIO(r.content))
            wdat = z.read("wannier90_hr.dat").decode("utf-8")
            js_file = jid + ".json"
            js = z.read(js_file).decode("utf-8")
            fd, path = tempfile.mkstemp()
            with os.fdopen(fd, "w") as tmp:
                tmp.write(wdat)
            w = WannierHam(path)
            fd, path = tempfile.mkstemp()
            with os.fdopen(fd, "w") as tmp:
                tmp.write(js)
            d = loadjson(path)
            ef = d["info_mesh"]["efermi"]
            fd, path = tempfile.mkstemp()
            pos = z.read("POSCAR").decode("utf-8")
            with os.fdopen(fd, "w") as tmp:
                tmp.write(pos)
            atoms = Poscar.from_file(path).atoms
    return w, ef, atoms
Пример #2
0
def data(dataset="dft_2d"):
    """Provide main function to download datasets."""
    url, js_tag = datasets(dataset)

    # r = requests.get(url)
    # z = zipfile.ZipFile(io.BytesIO(r.content))
    # wdat = z.read(js_tag).decode("utf-8")
    # fd, path = tempfile.mkstemp()
    # with os.fdopen(fd, "w") as tmp:
    #    tmp.write(wdat)
    # data = loadjson(path)

    path = str(os.path.join(os.path.dirname(__file__), js_tag))
    if not os.path.isfile(path):
        zfile = str(os.path.join(os.path.dirname(__file__), "tmp.zip"))
        r = requests.get(url)
        f = open(zfile, "wb")
        f.write(r.content)
        f.close()

        with zipfile.ZipFile(zfile, "r") as zipObj:
            # zipObj.extract(path)
            zipObj.extractall(os.path.join(os.path.dirname(__file__)))
        os.remove(zfile)
    data = loadjson(path)
    return data
Пример #3
0
def get_ff_eneleast():
    """Get JARVIS-FF related data."""
    jff1 = str(os.path.join(os.path.dirname(__file__), "jff1.json"))
    if not os.path.isfile(jff1):
        r = requests.get("https://ndownloader.figshare.com/files/10307139")
        f = open(jff1, "wb")
        f.write(r.content)
        f.close()
    data_ff1 = loadjson(jff1)
    return data_ff1
Пример #4
0
def data(dataset="dft_2d"):
    """Provide main function to download datasets."""
    url, js_tag = datasets(dataset)
    path = str(os.path.join(os.path.dirname(__file__), js_tag))
    if not os.path.isfile(path):
        zfile = str(os.path.join(os.path.dirname(__file__), "tmp.zip"))
        r = requests.get(url)
        f = open(zfile, "wb")
        f.write(r.content)
        f.close()

        with zipfile.ZipFile(zfile, "r") as zipObj:
            # zipObj.extract(path)
            zipObj.extractall(os.path.join(os.path.dirname(__file__)))
        os.remove(zfile)
    data = loadjson(path)
    return data
Пример #5
0
"""Module to generate XML file for LAMMPS calculation."""
import numpy as np
from jarvis.core.atoms import get_supercell_dims
from jarvis.db.jsonutils import loadjson
from jarvis.analysis.structure.spacegroup import Spacegroup3D
from jarvis.core.utils import stringdict_to_xml

mp_jv = loadjson("/rk2/knc6/DB/MP/mp_jv_id.json")


def get_jvid(mp=""):
    """Get JARVIS-ID for MPID."""
    jvid = ""
    try:
        jvid = "'" + ",".join(mp_jv[mp]) + "'"
    except Exception as exp:
        print("No JID", exp)
        pass
    return jvid


def basic_data(data={}, source="JARVIS-FF-LAMMPS"):
    """Get basic data for table."""
    info = {}
    info["id"] = data["jid"]
    info["source_folder"] = data["source_folder"]
    info["tmp_source_folder"] = "'" + data["source_folder"] + "'"
    info["tmp_id"] = "'" + data["jid"] + "'"
    ref = data["source_folder"].split("/")[-1].split("@")[1].split("_")[0]
    info["ref"] = "'" + ref + "'"
    info["jvid"] = get_jvid(ref)
Пример #6
0
from jarvis.tasks.vasp.vasp import JobFactory
from jarvis.db.jsonutils import loadjson
d = loadjson("job_fact.json")
v = JobFactory.from_dict(d)
v.all_optb88vdw_calcs()
Пример #7
0
def test_spg229():
    d = loadjson(os.path.join(os.path.dirname(__file__), "spg229.json"))
    for i in d:
        atoms = Atoms.from_dict(i["atoms"])
        spg = Spacegroup3D(atoms).space_group_number
        assert spg == i["spg_number"]
Пример #8
0
def train_tasks(mb=None,
                config_template="config_example.json",
                file_format="poscar"):
    """Train MatBench clalssification and regression tasks."""
    for task in mb.tasks:
        task.load()
        if task.metadata.task_type == CLF_KEY:
            classification = True
        else:
            classification = False
        # Classification tasks
        if classification:
            # rocs = []
            for ii, fold in enumerate(task.folds):
                train_df = task.get_train_and_val_data(fold, as_type="df")
                test_df = task.get_test_data(fold,
                                             include_target=True,
                                             as_type="df")
                train_df["is_metal"] = train_df["is_metal"].astype(int)
                test_df["is_metal"] = test_df["is_metal"].astype(int)
                # Name of the target property
                target = [
                    col for col in train_df.columns
                    if col not in ("id", "structure", "composition")
                ][0]
                # Making sure there are spaces or parenthesis which
                # can cause issue while creating folder
                fold_name = (task.dataset_name + "_" + target.replace(
                    " ", "_").replace("(", "-").replace(")", "-") + "_fold_" +
                             str(ii))
                if not os.path.exists(fold_name):
                    os.makedirs(fold_name)
                os.chdir(fold_name)
                # ALIGNN requires the id_prop.csv file
                f = open("id_prop.csv", "w")
                for jj, j in train_df.iterrows():
                    id = j.name
                    atoms = pmg_to_atoms(j.structure)
                    pos_name = id
                    atoms.write_poscar(pos_name)
                    val = j[target]
                    line = str(pos_name) + "," + str(val) + "\n"
                    f.write(line)
                # There is no pre-defined validation splt, so we will use
                # a portion of training set as validation set, and
                # keep test set intact
                val_df = train_df[0:len(test_df)]
                for jj, j in val_df.iterrows():
                    # for jj, j in test_df.iterrows():
                    id = j.name
                    atoms = pmg_to_atoms(j.structure)
                    pos_name = id
                    atoms.write_poscar(pos_name)
                    val = j[target]
                    line = str(pos_name) + "," + str(val) + "\n"
                    f.write(line)
                for jj, j in test_df.iterrows():
                    id = j.name
                    atoms = pmg_to_atoms(j.structure)
                    pos_name = id
                    atoms.write_poscar(pos_name)
                    val = j[target]
                    line = str(pos_name) + "," + str(val) + "\n"
                    f.write(line)
                n_train = len(train_df)
                n_val = len(val_df)
                n_test = len(test_df)
                config = loadjson(config_template)
                config["n_train"] = n_train
                config["n_val"] = n_val
                config["n_test"] = n_test
                config["keep_data_order"] = True
                config["batch_size"] = 32
                config["epochs"] = 40
                config["classification_threshold"] = 0.01
                fname = "config_fold_" + str(ii) + ".json"
                dumpjson(data=config, filename=fname)
                f.close()
                os.chdir("..")
                outdir_name = (task.dataset_name + "_" + target.replace(
                    " ", "_").replace("(", "-").replace(")", "-") +
                               "_outdir_" + str(ii))
                cmd = ("train_folder.py --root_dir " + fold_name +
                       " --config " + fold_name + "/" + fname +
                       " --file_format=" + file_format +
                       " --keep_data_order=True" +
                       " --classification_threshold=0.01" + " --output_dir=" +
                       outdir_name)
                print(cmd)
                os.system(cmd)
                test_csv = outdir_name + "/prediction_results_test_set.csv"
                df = pd.read_csv(test_csv)
                target_vals = df.target.values
                id_vals = df.id.values

        # Regression tasks
        # TODO: shorten the script by taking out repetitive lines
        if not classification:
            maes = []
            for ii, fold in enumerate(task.folds):
                train_df = task.get_train_and_val_data(fold, as_type="df")
                test_df = task.get_test_data(fold,
                                             include_target=True,
                                             as_type="df")
                # Name of the target property
                target = [
                    col for col in train_df.columns
                    if col not in ("id", "structure", "composition")
                ][0]
                # Making sure there are spaces or parenthesis which
                # can cause issue while creating folder
                fold_name = (task.dataset_name + "_" + target.replace(
                    " ", "_").replace("(", "-").replace(")", "-") + "_fold_" +
                             str(ii))
                if not os.path.exists(fold_name):
                    os.makedirs(fold_name)
                os.chdir(fold_name)
                # ALIGNN requires the id_prop.csv file
                f = open("id_prop.csv", "w")
                for jj, j in train_df.iterrows():
                    id = j.name
                    atoms = pmg_to_atoms(j.structure)
                    pos_name = id
                    atoms.write_poscar(pos_name)
                    val = j[target]
                    line = str(pos_name) + "," + str(val) + "\n"
                    f.write(line)
                # There is no pre-defined validation splt, so we will use
                # a portion of training set as validation set, and
                # keep test set intact
                val_df = train_df[0:len(test_df)]
                for jj, j in val_df.iterrows():
                    # for jj, j in test_df.iterrows():
                    id = j.name
                    atoms = pmg_to_atoms(j.structure)
                    pos_name = id
                    atoms.write_poscar(pos_name)
                    val = j[target]
                    line = str(pos_name) + "," + str(val) + "\n"
                    f.write(line)
                for jj, j in test_df.iterrows():
                    id = j.name
                    atoms = pmg_to_atoms(j.structure)
                    pos_name = id
                    atoms.write_poscar(pos_name)
                    val = j[target]
                    line = str(pos_name) + "," + str(val) + "\n"
                    f.write(line)
                n_train = len(train_df)
                n_val = len(val_df)
                n_test = len(test_df)
                config = loadjson(config_template)
                config["n_train"] = n_train
                config["n_val"] = n_val
                config["n_test"] = n_test
                config["keep_data_order"] = True
                config["batch_size"] = 32
                config["epochs"] = 500
                fname = "config_fold_" + str(ii) + ".json"
                dumpjson(data=config, filename=fname)
                f.close()
                os.chdir("..")
                outdir_name = (task.dataset_name + "_" + target.replace(
                    " ", "_").replace("(", "-").replace(")", "-") +
                               "_outdir_" + str(ii))
                cmd = ("train_folder.py --root_dir " + fold_name +
                       " --config " + fold_name + "/" + fname +
                       " --file_format=" + file_format +
                       " --keep_data_order=True" + " --output_dir=" +
                       outdir_name)
                print(cmd)
                os.system(cmd)
                test_csv = outdir_name + "/prediction_results_test_set.csv"
                df = pd.read_csv(test_csv)
                target_vals = df.target.values
                # id_vals = df.id.values
                pred_vals = df.prediction.values
                mae = mean_absolute_error(target_vals, pred_vals)
                maes.append(mae)
                task.record(fold, pred_vals, params=config)
                print(
                    "Dataset_name, Fold, MAE=",
                    task.dataset_name,
                    fold,
                    mean_absolute_error(target_vals, pred_vals),
                )
            maes = np.array(maes)
            print(maes, np.mean(maes), np.std(maes))
            print()
            print()
            print()
Пример #9
0
            pred_vals = [True if i == 1 else False for i in pred_vals]
        results[fold] = pred_vals

    if regression:
        maes = np.array(maes)
        print(key, maes, np.mean(maes), np.std(maes))
    if not regression:
        roc_aucs = np.array(roc_aucs)
        print(key, roc_aucs, np.mean(roc_aucs), np.std(roc_aucs))
    return results


if __name__ == "__main__":
    config_template = os.path.abspath(
        os.path.join(os.path.dirname(__file__), "config_example.json"))
    config = loadjson(config_template)
    train_tasks(mb=mb, config_template=config_template, file_format="poscar")

    run_dir = "."
    # run_dir = "/wrk/knc6/matbench/benchmarks/matbench_v0.1_alignn"

    cwd = os.getcwd()

    os.chdir(run_dir)

    results = defaultdict()
    for task in mb.tasks:
        task.load()
        task_name = task.dataset_name
        regr = True
        if "is" in task_name:
Пример #10
0
    def runjob(self):
        """Provide main function for running a generic VASP calculation."""
        # poscar=self.poscar
        # incar=self.incar
        # kpoints=self.kpoints
        # copy_files=self.copy_files

        # cwd = str(os.getcwd())
        if self.jobname == "":
            jobname = str(self.poscar.comment)
        # job_dir = str(self.jobname)
        run_file = (str(os.getcwd()) + str("/") + str(self.jobname) +
                    str(".json"))
        run_dir = str(os.getcwd()) + str("/") + str(self.jobname)
        if self.poscar.comment.startswith("Surf"):
            [a, b, c] = self.kpoints.kpts[0]
            # self.kpoints.kpts = [[a, b, 1]]
            self.kpoints = Kpoints3D(kpoints=[[a, b, 1]])
            try:
                pol = self.poscar.atoms.check_polar
                if pol:
                    COM = self.poscar.atoms.get_center_of_mass()
                    print("COM=", COM)
                    print("Found polar surface,setting dipole corrections")
                    self.incar.update({
                        "LDIPOL":
                        ".TRUE.",
                        "IDIPOL":
                        3,
                        "ISYM":
                        0,
                        "DIPOL":
                        str(COM[0]) + str(" ") + str(COM[2]) + str(" ") +
                        str(COM[2]),
                    })
                    print(
                        "Polar surface encountered in run_job",
                        self.poscar.comment,
                    )
            except Exception:
                pass
        wait = False
        json_file = str(self.jobname) + str(".json")
        print(
            "json should be here=",
            str(os.getcwd()) + str("/") + str(json_file),
        )
        print("json should be=", json_file, run_file, os.getcwd())
        if os.path.exists(str(os.getcwd()) + str("/") + str(json_file)):
            try:
                data_cal = loadjson(
                    str(os.getcwd()) + str("/") + str(json_file))
                tmp_outcar = (str(os.getcwd()) + str("/") +
                              str(json_file.split(".json")[0]) +
                              str("/OUTCAR"))
                print("outcar is", tmp_outcar)
                wait = Outcar(tmp_outcar).converged  # True
                print("outcar status", wait)
                if wait:
                    f_energy = data_cal[0]["final_energy"]
                    contcar = (str(os.getcwd()) + str("/") +
                               str(json_file.split(".json")[0]) +
                               str("/CONTCAR"))
                    return f_energy, contcar
            except Exception:
                pass
        attempt = 0
        while not wait:
            attempt = attempt + 1
            if attempt == self.attempts:
                wait = True
            # print("Setting up POTCAR")
            # if self.potcar is None:
            #  new_symb = list(set(self.poscar.atoms.elements))
            #  self.potcar = Potcar(elements=new_symb, pot_type=self.pot_type)
            if not os.path.exists(run_dir):
                print("Starting new job")
                os.makedirs(run_dir)
                os.chdir(run_dir)
                self.poscar.write_file("POSCAR")
            else:
                os.chdir(run_dir)
                if os.path.isfile("OUTCAR"):
                    try:
                        wait = Outcar(
                            "OUTCAR"
                        ).converged  # Vasprun("vasprun.xml").converged
                        # wait=Vasprun("vasprun.xml").converged
                    except Exception:
                        pass
                    try:
                        self.potcar.write_file("POTCAR")
                        print("FOUND OLD CONTCAR in", os.getcwd())
                        copy_cmd = str("cp CONTCAR POSCAR")
                        self.poscar.write_file("POSCAR")
                        # pos = Poscar.from_file("CONTCAR")
                        print("copy_cmd=", copy_cmd)
                        if ("ELAST" not in jobname
                                and "LEPSILON" not in jobname):
                            # Because in ELASTIC calculations
                            # structures are deformed
                            os.system(copy_cmd)
                        # time.sleep(3)
                    except Exception:
                        pass

            self.incar.write_file("INCAR")
            self.potcar.write_file("POTCAR")
            self.kpoints.write_file("KPOINTS")
            for i in self.copy_files:
                print("copying", i)
                shutil.copy2(i, "./")

            self.run()  # .wait()
            print("Queue 1")
            if os.path.isfile("OUTCAR"):
                try:
                    wait = Outcar(
                        "OUTCAR").converged  # Vasprun("vasprun.xml").converged
                except Exception:
                    pass
            print("End of the first loop", os.getcwd(), wait)

        f_energy = "na"
        # enp = "na"
        contcar = str(os.getcwd()) + str("/") + str("CONTCAR")
        final_str = Poscar.from_file(contcar).atoms
        vrun = Vasprun("vasprun.xml")
        f_energy = float(vrun.final_energy)
        # enp = float(f_energy) / float(final_str.num_atoms)
        # natoms = final_str.num_atoms
        os.chdir("../")
        if wait:
            data_cal = []
            data_cal.append({
                "jobname": self.jobname,
                "poscar": self.poscar.atoms.to_dict(),
                "incar": self.incar.to_dict(),
                "kpoints": self.kpoints.to_dict(),
                "final_energy": (f_energy),
                "contcar": final_str.to_dict(),
            })
            json_file = str(self.jobname) + str(".json")
            f_json = open(json_file, "w")
            f_json.write(json.dumps(data_cal))
            f_json.close()
            print("Wrote json file", f_energy)
            return f_energy, contcar