def get_wann_electron(jid="JVASP-816"): """Download electron WTBH if available.""" w = "" ef = "" fls = data("raw_files") for i in fls["WANN"]: if i["name"].split(".zip")[0] == jid: r = requests.get(i["download_url"]) z = zipfile.ZipFile(io.BytesIO(r.content)) wdat = z.read("wannier90_hr.dat").decode("utf-8") js_file = jid + ".json" js = z.read(js_file).decode("utf-8") fd, path = tempfile.mkstemp() with os.fdopen(fd, "w") as tmp: tmp.write(wdat) w = WannierHam(path) fd, path = tempfile.mkstemp() with os.fdopen(fd, "w") as tmp: tmp.write(js) d = loadjson(path) ef = d["info_mesh"]["efermi"] fd, path = tempfile.mkstemp() pos = z.read("POSCAR").decode("utf-8") with os.fdopen(fd, "w") as tmp: tmp.write(pos) atoms = Poscar.from_file(path).atoms return w, ef, atoms
def data(dataset="dft_2d"): """Provide main function to download datasets.""" url, js_tag = datasets(dataset) # r = requests.get(url) # z = zipfile.ZipFile(io.BytesIO(r.content)) # wdat = z.read(js_tag).decode("utf-8") # fd, path = tempfile.mkstemp() # with os.fdopen(fd, "w") as tmp: # tmp.write(wdat) # data = loadjson(path) path = str(os.path.join(os.path.dirname(__file__), js_tag)) if not os.path.isfile(path): zfile = str(os.path.join(os.path.dirname(__file__), "tmp.zip")) r = requests.get(url) f = open(zfile, "wb") f.write(r.content) f.close() with zipfile.ZipFile(zfile, "r") as zipObj: # zipObj.extract(path) zipObj.extractall(os.path.join(os.path.dirname(__file__))) os.remove(zfile) data = loadjson(path) return data
def get_ff_eneleast(): """Get JARVIS-FF related data.""" jff1 = str(os.path.join(os.path.dirname(__file__), "jff1.json")) if not os.path.isfile(jff1): r = requests.get("https://ndownloader.figshare.com/files/10307139") f = open(jff1, "wb") f.write(r.content) f.close() data_ff1 = loadjson(jff1) return data_ff1
def data(dataset="dft_2d"): """Provide main function to download datasets.""" url, js_tag = datasets(dataset) path = str(os.path.join(os.path.dirname(__file__), js_tag)) if not os.path.isfile(path): zfile = str(os.path.join(os.path.dirname(__file__), "tmp.zip")) r = requests.get(url) f = open(zfile, "wb") f.write(r.content) f.close() with zipfile.ZipFile(zfile, "r") as zipObj: # zipObj.extract(path) zipObj.extractall(os.path.join(os.path.dirname(__file__))) os.remove(zfile) data = loadjson(path) return data
"""Module to generate XML file for LAMMPS calculation.""" import numpy as np from jarvis.core.atoms import get_supercell_dims from jarvis.db.jsonutils import loadjson from jarvis.analysis.structure.spacegroup import Spacegroup3D from jarvis.core.utils import stringdict_to_xml mp_jv = loadjson("/rk2/knc6/DB/MP/mp_jv_id.json") def get_jvid(mp=""): """Get JARVIS-ID for MPID.""" jvid = "" try: jvid = "'" + ",".join(mp_jv[mp]) + "'" except Exception as exp: print("No JID", exp) pass return jvid def basic_data(data={}, source="JARVIS-FF-LAMMPS"): """Get basic data for table.""" info = {} info["id"] = data["jid"] info["source_folder"] = data["source_folder"] info["tmp_source_folder"] = "'" + data["source_folder"] + "'" info["tmp_id"] = "'" + data["jid"] + "'" ref = data["source_folder"].split("/")[-1].split("@")[1].split("_")[0] info["ref"] = "'" + ref + "'" info["jvid"] = get_jvid(ref)
from jarvis.tasks.vasp.vasp import JobFactory from jarvis.db.jsonutils import loadjson d = loadjson("job_fact.json") v = JobFactory.from_dict(d) v.all_optb88vdw_calcs()
def test_spg229(): d = loadjson(os.path.join(os.path.dirname(__file__), "spg229.json")) for i in d: atoms = Atoms.from_dict(i["atoms"]) spg = Spacegroup3D(atoms).space_group_number assert spg == i["spg_number"]
def train_tasks(mb=None, config_template="config_example.json", file_format="poscar"): """Train MatBench clalssification and regression tasks.""" for task in mb.tasks: task.load() if task.metadata.task_type == CLF_KEY: classification = True else: classification = False # Classification tasks if classification: # rocs = [] for ii, fold in enumerate(task.folds): train_df = task.get_train_and_val_data(fold, as_type="df") test_df = task.get_test_data(fold, include_target=True, as_type="df") train_df["is_metal"] = train_df["is_metal"].astype(int) test_df["is_metal"] = test_df["is_metal"].astype(int) # Name of the target property target = [ col for col in train_df.columns if col not in ("id", "structure", "composition") ][0] # Making sure there are spaces or parenthesis which # can cause issue while creating folder fold_name = (task.dataset_name + "_" + target.replace( " ", "_").replace("(", "-").replace(")", "-") + "_fold_" + str(ii)) if not os.path.exists(fold_name): os.makedirs(fold_name) os.chdir(fold_name) # ALIGNN requires the id_prop.csv file f = open("id_prop.csv", "w") for jj, j in train_df.iterrows(): id = j.name atoms = pmg_to_atoms(j.structure) pos_name = id atoms.write_poscar(pos_name) val = j[target] line = str(pos_name) + "," + str(val) + "\n" f.write(line) # There is no pre-defined validation splt, so we will use # a portion of training set as validation set, and # keep test set intact val_df = train_df[0:len(test_df)] for jj, j in val_df.iterrows(): # for jj, j in test_df.iterrows(): id = j.name atoms = pmg_to_atoms(j.structure) pos_name = id atoms.write_poscar(pos_name) val = j[target] line = str(pos_name) + "," + str(val) + "\n" f.write(line) for jj, j in test_df.iterrows(): id = j.name atoms = pmg_to_atoms(j.structure) pos_name = id atoms.write_poscar(pos_name) val = j[target] line = str(pos_name) + "," + str(val) + "\n" f.write(line) n_train = len(train_df) n_val = len(val_df) n_test = len(test_df) config = loadjson(config_template) config["n_train"] = n_train config["n_val"] = n_val config["n_test"] = n_test config["keep_data_order"] = True config["batch_size"] = 32 config["epochs"] = 40 config["classification_threshold"] = 0.01 fname = "config_fold_" + str(ii) + ".json" dumpjson(data=config, filename=fname) f.close() os.chdir("..") outdir_name = (task.dataset_name + "_" + target.replace( " ", "_").replace("(", "-").replace(")", "-") + "_outdir_" + str(ii)) cmd = ("train_folder.py --root_dir " + fold_name + " --config " + fold_name + "/" + fname + " --file_format=" + file_format + " --keep_data_order=True" + " --classification_threshold=0.01" + " --output_dir=" + outdir_name) print(cmd) os.system(cmd) test_csv = outdir_name + "/prediction_results_test_set.csv" df = pd.read_csv(test_csv) target_vals = df.target.values id_vals = df.id.values # Regression tasks # TODO: shorten the script by taking out repetitive lines if not classification: maes = [] for ii, fold in enumerate(task.folds): train_df = task.get_train_and_val_data(fold, as_type="df") test_df = task.get_test_data(fold, include_target=True, as_type="df") # Name of the target property target = [ col for col in train_df.columns if col not in ("id", "structure", "composition") ][0] # Making sure there are spaces or parenthesis which # can cause issue while creating folder fold_name = (task.dataset_name + "_" + target.replace( " ", "_").replace("(", "-").replace(")", "-") + "_fold_" + str(ii)) if not os.path.exists(fold_name): os.makedirs(fold_name) os.chdir(fold_name) # ALIGNN requires the id_prop.csv file f = open("id_prop.csv", "w") for jj, j in train_df.iterrows(): id = j.name atoms = pmg_to_atoms(j.structure) pos_name = id atoms.write_poscar(pos_name) val = j[target] line = str(pos_name) + "," + str(val) + "\n" f.write(line) # There is no pre-defined validation splt, so we will use # a portion of training set as validation set, and # keep test set intact val_df = train_df[0:len(test_df)] for jj, j in val_df.iterrows(): # for jj, j in test_df.iterrows(): id = j.name atoms = pmg_to_atoms(j.structure) pos_name = id atoms.write_poscar(pos_name) val = j[target] line = str(pos_name) + "," + str(val) + "\n" f.write(line) for jj, j in test_df.iterrows(): id = j.name atoms = pmg_to_atoms(j.structure) pos_name = id atoms.write_poscar(pos_name) val = j[target] line = str(pos_name) + "," + str(val) + "\n" f.write(line) n_train = len(train_df) n_val = len(val_df) n_test = len(test_df) config = loadjson(config_template) config["n_train"] = n_train config["n_val"] = n_val config["n_test"] = n_test config["keep_data_order"] = True config["batch_size"] = 32 config["epochs"] = 500 fname = "config_fold_" + str(ii) + ".json" dumpjson(data=config, filename=fname) f.close() os.chdir("..") outdir_name = (task.dataset_name + "_" + target.replace( " ", "_").replace("(", "-").replace(")", "-") + "_outdir_" + str(ii)) cmd = ("train_folder.py --root_dir " + fold_name + " --config " + fold_name + "/" + fname + " --file_format=" + file_format + " --keep_data_order=True" + " --output_dir=" + outdir_name) print(cmd) os.system(cmd) test_csv = outdir_name + "/prediction_results_test_set.csv" df = pd.read_csv(test_csv) target_vals = df.target.values # id_vals = df.id.values pred_vals = df.prediction.values mae = mean_absolute_error(target_vals, pred_vals) maes.append(mae) task.record(fold, pred_vals, params=config) print( "Dataset_name, Fold, MAE=", task.dataset_name, fold, mean_absolute_error(target_vals, pred_vals), ) maes = np.array(maes) print(maes, np.mean(maes), np.std(maes)) print() print() print()
pred_vals = [True if i == 1 else False for i in pred_vals] results[fold] = pred_vals if regression: maes = np.array(maes) print(key, maes, np.mean(maes), np.std(maes)) if not regression: roc_aucs = np.array(roc_aucs) print(key, roc_aucs, np.mean(roc_aucs), np.std(roc_aucs)) return results if __name__ == "__main__": config_template = os.path.abspath( os.path.join(os.path.dirname(__file__), "config_example.json")) config = loadjson(config_template) train_tasks(mb=mb, config_template=config_template, file_format="poscar") run_dir = "." # run_dir = "/wrk/knc6/matbench/benchmarks/matbench_v0.1_alignn" cwd = os.getcwd() os.chdir(run_dir) results = defaultdict() for task in mb.tasks: task.load() task_name = task.dataset_name regr = True if "is" in task_name:
def runjob(self): """Provide main function for running a generic VASP calculation.""" # poscar=self.poscar # incar=self.incar # kpoints=self.kpoints # copy_files=self.copy_files # cwd = str(os.getcwd()) if self.jobname == "": jobname = str(self.poscar.comment) # job_dir = str(self.jobname) run_file = (str(os.getcwd()) + str("/") + str(self.jobname) + str(".json")) run_dir = str(os.getcwd()) + str("/") + str(self.jobname) if self.poscar.comment.startswith("Surf"): [a, b, c] = self.kpoints.kpts[0] # self.kpoints.kpts = [[a, b, 1]] self.kpoints = Kpoints3D(kpoints=[[a, b, 1]]) try: pol = self.poscar.atoms.check_polar if pol: COM = self.poscar.atoms.get_center_of_mass() print("COM=", COM) print("Found polar surface,setting dipole corrections") self.incar.update({ "LDIPOL": ".TRUE.", "IDIPOL": 3, "ISYM": 0, "DIPOL": str(COM[0]) + str(" ") + str(COM[2]) + str(" ") + str(COM[2]), }) print( "Polar surface encountered in run_job", self.poscar.comment, ) except Exception: pass wait = False json_file = str(self.jobname) + str(".json") print( "json should be here=", str(os.getcwd()) + str("/") + str(json_file), ) print("json should be=", json_file, run_file, os.getcwd()) if os.path.exists(str(os.getcwd()) + str("/") + str(json_file)): try: data_cal = loadjson( str(os.getcwd()) + str("/") + str(json_file)) tmp_outcar = (str(os.getcwd()) + str("/") + str(json_file.split(".json")[0]) + str("/OUTCAR")) print("outcar is", tmp_outcar) wait = Outcar(tmp_outcar).converged # True print("outcar status", wait) if wait: f_energy = data_cal[0]["final_energy"] contcar = (str(os.getcwd()) + str("/") + str(json_file.split(".json")[0]) + str("/CONTCAR")) return f_energy, contcar except Exception: pass attempt = 0 while not wait: attempt = attempt + 1 if attempt == self.attempts: wait = True # print("Setting up POTCAR") # if self.potcar is None: # new_symb = list(set(self.poscar.atoms.elements)) # self.potcar = Potcar(elements=new_symb, pot_type=self.pot_type) if not os.path.exists(run_dir): print("Starting new job") os.makedirs(run_dir) os.chdir(run_dir) self.poscar.write_file("POSCAR") else: os.chdir(run_dir) if os.path.isfile("OUTCAR"): try: wait = Outcar( "OUTCAR" ).converged # Vasprun("vasprun.xml").converged # wait=Vasprun("vasprun.xml").converged except Exception: pass try: self.potcar.write_file("POTCAR") print("FOUND OLD CONTCAR in", os.getcwd()) copy_cmd = str("cp CONTCAR POSCAR") self.poscar.write_file("POSCAR") # pos = Poscar.from_file("CONTCAR") print("copy_cmd=", copy_cmd) if ("ELAST" not in jobname and "LEPSILON" not in jobname): # Because in ELASTIC calculations # structures are deformed os.system(copy_cmd) # time.sleep(3) except Exception: pass self.incar.write_file("INCAR") self.potcar.write_file("POTCAR") self.kpoints.write_file("KPOINTS") for i in self.copy_files: print("copying", i) shutil.copy2(i, "./") self.run() # .wait() print("Queue 1") if os.path.isfile("OUTCAR"): try: wait = Outcar( "OUTCAR").converged # Vasprun("vasprun.xml").converged except Exception: pass print("End of the first loop", os.getcwd(), wait) f_energy = "na" # enp = "na" contcar = str(os.getcwd()) + str("/") + str("CONTCAR") final_str = Poscar.from_file(contcar).atoms vrun = Vasprun("vasprun.xml") f_energy = float(vrun.final_energy) # enp = float(f_energy) / float(final_str.num_atoms) # natoms = final_str.num_atoms os.chdir("../") if wait: data_cal = [] data_cal.append({ "jobname": self.jobname, "poscar": self.poscar.atoms.to_dict(), "incar": self.incar.to_dict(), "kpoints": self.kpoints.to_dict(), "final_energy": (f_energy), "contcar": final_str.to_dict(), }) json_file = str(self.jobname) + str(".json") f_json = open(json_file, "w") f_json.write(json.dumps(data_cal)) f_json.close() print("Wrote json file", f_energy) return f_energy, contcar