def add_structure(self, source, name=None, identifier=None, fmt=None): """add a structure to the mpfile""" from pymatgen import Structure, MPRester if isinstance(source, Structure): structure = source elif isinstance(source, dict): structure = Structure.from_dict(source) elif os.path.exists(source): structure = Structure.from_file(source, sort=True) elif isinstance(source, six.string_types): if fmt is None: raise ValueError("Need fmt to get structure from string!") structure = Structure.from_str(source, fmt, sort=True) else: raise ValueError(source, "not supported!") if name is not None: if not isinstance(name, six.string_types): raise ValueError("structure name needs to be a string") elif "." in name: raise ValueError("structure name cannot contain dots (.)") mpr = MPRester() if not mpr.api_key: raise ValueError( "API key not set. Run `pmg config --add PMG_MAPI_KEY <USER_API_KEY>`." ) matched_mpids = mpr.find_structure(structure) formula = get_composition_from_string(structure.composition.formula) if not matched_mpids: if identifier is None: identifier = formula print( "Structure not found in MP! Please submit via MPComplete to " "obtain mp-id or manually choose an anchor mp-id! Continuing " "with {} as identifier!".format(identifier)) else: print("Structure not found in MP! Forcing {} as identifier!". format(identifier)) elif identifier is None: identifier = matched_mpids[0] if len(matched_mpids) > 1: print("Multiple matching structures found in MP. Using", identifier) elif identifier not in matched_mpids: msg = "Structure does not match {} but instead {}!".format( identifier, matched_mpids) raise ValueError(msg) idx = len( self.document.get(identifier, {}).get(mp_level01_titles[3], {})) sub_key = formula if name is None else name if sub_key in self.document.get(identifier, {}).get(mp_level01_titles[3], {}): sub_key += "_{}".format(idx) self.document.rec_update( nest_dict(structure.as_dict(), [identifier, mp_level01_titles[3], sub_key])) return identifier
def test(self, structure): failures = [] if self.is_valid: if not structure.is_valid(): failures.append("IS_VALID=False") if self.potcar_exists: elements = structure.composition.elements if set(elements).intersection(set(self.NO_POTCARS)): failures.append("POTCAR_EXISTS=False") if self.max_natoms: if structure.num_sites > self.max_natoms: failures.append("MAX_NATOMS=Exceeded") if self.is_ordered: if not structure.is_ordered: failures.append("IS_ORDERED=False") if self.not_in_MP: mpr = MPRester(self.MAPI_KEY) mpids = mpr.find_structure(structure) if mpids: if self.require_bandstructure: for mpid in mpids: try: bs = mpr.get_bandstructure_by_material_id(mpid) if bs: failures.append("NOT_IN_MP=False ({})".format(mpid)) except: pass else: failures.append("NOT_IN_MP=False ({})".format(mpids[0])) return True if not failures else False
def add_structure(self, source, name=None, identifier=None, fmt=None): """add a structure to the mpfile""" if isinstance(source, Structure): structure = source elif isinstance(source, dict): structure = Structure.from_dict(source) elif os.path.exists(source): structure = Structure.from_file(source) elif isinstance(source, six.string_types): if fmt is None: raise ValueError('Need fmt to get structure from string!') structure = Structure.from_str(source, fmt) else: raise ValueError(source, 'not supported!') mpr = MPRester() if not mpr.api_key: raise ValueError( 'API key not set. Run `pmg config --add PMG_MAPI_KEY <USER_API_KEY>`.' ) matched_mpids = mpr.find_structure(structure) if not matched_mpids: raise ValueError( 'Structure not found in MP. Please submit via MPComplete to obtain mp-id!' ) elif identifier is None: identifier = matched_mpids[0] if len(matched_mpids) > 1: print('Multiple matching structures found in MP. Using', identifier) elif identifier not in matched_mpids: raise ValueError( 'Structure does not match {} but instead {}'.format( identifier, matched_mpids)) idx = len( self.document.get(identifier, {}).get(mp_level01_titles[3], {})) sub_key = 's{}'.format(idx) if name is None else name self.document.rec_update( nest_dict(structure.as_dict(), [identifier, mp_level01_titles[3], sub_key]))
class MaterialsEhullBuilder(AbstractBuilder): def __init__(self, materials_write, mapi_key=None, update_all=False): """ Starting with an existing materials collection, adds stability information and The Materials Project ID. Args: materials_write: mongodb collection for materials (write access needed) mapi_key: (str) Materials API key (if MAPI_KEY env. var. not set) update_all: (bool) - if true, updates all docs. If false, only updates docs w/o a stability key """ self._materials = materials_write self.mpr = MPRester(api_key=mapi_key) self.update_all = update_all def run(self): logger.info("MaterialsEhullBuilder starting...") self._build_indexes() q = {"thermo.energy": {"$exists": True}} if not self.update_all: q["stability"] = {"$exists": False} mats = [ m for m in self._materials.find( q, { "calc_settings": 1, "structure": 1, "thermo.energy": 1, "material_id": 1 }) ] pbar = tqdm(mats) for m in pbar: pbar.set_description("Processing materials_id: {}".format( m['material_id'])) try: params = {} for x in ["is_hubbard", "hubbards", "potcar_spec"]: params[x] = m["calc_settings"][x] structure = Structure.from_dict(m["structure"]) energy = m["thermo"]["energy"] my_entry = ComputedEntry(structure.composition, energy, parameters=params) # TODO: @computron This only calculates Ehull with respect to Materials Project. # It should also account for the current database's results. -computron self._materials.update_one({"material_id": m["material_id"]}, { "$set": { "stability": self.mpr.get_stability([my_entry])[0] } }) # TODO: @computron: also add additional properties like inverse hull energy? # TODO: @computron it's better to use PD tool or reaction energy calculator # Otherwise the compatibility schemes might have issues...one strategy might be # use MP only to retrieve entries but compute the PD locally -computron for el, elx in my_entry.composition.items(): entries = self.mpr.get_entries(el.symbol, compatible_only=True) min_e = min( entries, key=lambda x: x.energy_per_atom).energy_per_atom energy -= elx * min_e self._materials.update_one({"material_id": m["material_id"]}, { "$set": { "thermo.formation_energy_per_atom": energy / structure.num_sites } }) mpids = self.mpr.find_structure(structure) self._materials.update_one({"material_id": m["material_id"]}, {"$set": { "mpids": mpids }}) except: import traceback logger.exception("<---") logger.exception( "There was an error processing material_id: {}".format(m)) logger.exception(traceback.format_exc()) logger.exception("--->") logger.info("MaterialsEhullBuilder finished processing.") def reset(self): logger.info("Resetting MaterialsEhullBuilder") self._materials.update_many({}, {"$unset": {"stability": 1}}) self._build_indexes() logger.info("Finished resetting MaterialsEhullBuilder") def _build_indexes(self): self._materials.create_index("stability.e_above_hull") @classmethod def from_file(cls, db_file, m="materials", **kwargs): """ Get a MaterialsEhullBuilder using only a db file Args: db_file: (str) path to db file m: (str) name of "materials" collection **kwargs: other parameters to feed into the builder, e.g. mapi_key """ db_write = get_database(db_file, admin=True) return cls(db_write[m], **kwargs)
class MaterialsEhullBuilder(AbstractBuilder): def __init__(self, materials_write, mapi_key=None, update_all=False): """ Starting with an existing materials collection, adds stability information and The Materials Project ID. Args: materials_write: mongodb collection for materials (write access needed) mapi_key: (str) Materials API key (if MAPI_KEY env. var. not set) update_all: (bool) - if true, updates all docs. If false, only updates docs w/o a stability key """ self._materials = materials_write self.mpr = MPRester(api_key=mapi_key) self.update_all = update_all def run(self): print("MaterialsEhullBuilder starting...") self._build_indexes() q = {"thermo.energy": {"$exists": True}} if not self.update_all: q["stability"] = {"$exists": False} mats = [ m for m in self._materials.find( q, { "calc_settings": 1, "structure": 1, "thermo.energy": 1, "material_id": 1 }) ] pbar = tqdm(mats) for m in pbar: pbar.set_description("Processing materials_id: {}".format( m['material_id'])) try: params = {} for x in ["is_hubbard", "hubbards", "potcar_spec"]: params[x] = m["calc_settings"][x] structure = Structure.from_dict(m["structure"]) energy = m["thermo"]["energy"] my_entry = ComputedEntry(structure.composition, energy, parameters=params) self._materials.update_one({"material_id": m["material_id"]}, { "$set": { "stability": self.mpr.get_stability([my_entry])[0] } }) mpids = self.mpr.find_structure(structure) self._materials.update_one({"material_id": m["material_id"]}, {"$set": { "mpids": mpids }}) except: import traceback print("<---") print( "There was an error processing material_id: {}".format(m)) traceback.print_exc() print("--->") print("MaterialsEhullBuilder finished processing.") def reset(self): self._materials.update_many({}, {"$unset": {"stability": 1}}) self._build_indexes() def _build_indexes(self): self._materials.create_index("stability.e_above_hull") @classmethod def from_file(cls, db_file, m="materials", **kwargs): """ Get a MaterialsEhullBuilder using only a db file Args: db_file: (str) path to db file m: (str) name of "materials" collection **kwargs: other parameters to feed into the builder, e.g. mapi_key """ db_write = get_database(db_file, admin=True) return cls(db_write[m], **kwargs)
class MaterialsEhullBuilder: def __init__(self, materials_write, mapi_key=None, update_all=False): """ Starting with an existing materials collection, adds stability information and The Materials Project ID. Args: materials_write: mongodb collection for materials (write access needed) mapi_key: (str) Materials API key (if MAPI_KEY env. var. not set) update_all: (bool) - if true, updates all docs. If false, only updates docs w/o a stability key """ self._materials = materials_write self.mpr = MPRester(api_key=mapi_key) self.update_all = update_all def run(self): print("MaterialsEhullBuilder starting...") self._build_indexes() q = {"thermo.energy": {"$exists": True}} if not self.update_all: q["stability"] = {"$exists": False} mats = [m for m in self._materials.find(q, {"calc_settings": 1, "structure": 1, "thermo.energy": 1, "material_id": 1})] pbar = tqdm(mats) for m in pbar: pbar.set_description("Processing materials_id: {}".format(m['material_id'])) try: params = {} for x in ["is_hubbard", "hubbards", "potcar_spec"]: params[x] = m["calc_settings"][x] structure = Structure.from_dict(m["structure"]) energy = m["thermo"]["energy"] my_entry = ComputedEntry(structure.composition, energy, parameters=params) self._materials.update_one({"material_id": m["material_id"]}, {"$set": {"stability": self.mpr.get_stability([my_entry])[0]}}) mpids = self.mpr.find_structure(structure) self._materials.update_one({"material_id": m["material_id"]}, {"$set": {"mpids": mpids}}) except: import traceback print("<---") print("There was an error processing material_id: {}".format(m)) traceback.print_exc() print("--->") print("MaterialsEhullBuilder finished processing.") def reset(self): self._materials.update_many({}, {"$unset": {"stability": 1}}) self._build_indexes() def _build_indexes(self): self._materials.create_index("stability.e_above_hull") @staticmethod def from_db_file(db_file, m="materials", **kwargs): """ Get a MaterialsEhullBuilder using only a db file Args: db_file: (str) path to db file m: (str) name of "materials" collection **kwargs: other parameters to feed into the builder, e.g. mapi_key """ db_write = get_database(db_file, admin=True) return MaterialsEhullBuilder(db_write[m], **kwargs)
project=project, _skip=skip, _limit=250, _fields=["identifier"]).result() existing += [c["identifier"] for c in contribs["data"]] has_more = contribs["has_more"] uploaded = len(existing) print(uploaded, "already uploaded.") if idx < uploaded: continue # structure = Structure.from_dict(input_prim) if config["use_identifier"]: structure = input_prim matches = mpr.find_structure(structure) if not matches: print("no match for idx", idx) matches = [str(idx)] identifier = matches[0] if identifier in existing: continue if identifier in contributions: print(idx, identifier, "already parsed") continue contrib = { "project": project, "identifier": identifier, "data": { target: targ_prop
class MaterialsEhullBuilder(AbstractBuilder): def __init__(self, materials_write, mapi_key=None, update_all=False): """ Starting with an existing materials collection, adds stability information and The Materials Project ID. Args: materials_write: mongodb collection for materials (write access needed) mapi_key: (str) Materials API key (if MAPI_KEY env. var. not set) update_all: (bool) - if true, updates all docs. If false, only updates docs w/o a stability key """ self._materials = materials_write self.mpr = MPRester(api_key=mapi_key) self.update_all = update_all def run(self): logger.info("MaterialsEhullBuilder starting...") self._build_indexes() q = {"thermo.energy": {"$exists": True}} if not self.update_all: q["stability"] = {"$exists": False} mats = [m for m in self._materials.find(q, {"calc_settings": 1, "structure": 1, "thermo.energy": 1, "material_id": 1})] pbar = tqdm(mats) for m in pbar: pbar.set_description("Processing materials_id: {}".format(m['material_id'])) try: params = {} for x in ["is_hubbard", "hubbards", "potcar_spec"]: params[x] = m["calc_settings"][x] structure = Structure.from_dict(m["structure"]) energy = m["thermo"]["energy"] my_entry = ComputedEntry(structure.composition, energy, parameters=params) # TODO: @computron This only calculates Ehull with respect to Materials Project. # It should also account for the current database's results. -computron self._materials.update_one({"material_id": m["material_id"]}, {"$set": {"stability": self.mpr.get_stability([my_entry])[0]}}) # TODO: @computron: also add additional properties like inverse hull energy? # TODO: @computron it's better to use PD tool or reaction energy calculator # Otherwise the compatibility schemes might have issues...one strategy might be # use MP only to retrieve entries but compute the PD locally -computron for el, elx in my_entry.composition.items(): entries = self.mpr.get_entries(el.symbol, compatible_only=True) min_e = min(entries, key=lambda x: x.energy_per_atom).energy_per_atom energy -= elx * min_e self._materials.update_one({"material_id": m["material_id"]}, {"$set": {"thermo.formation_energy_per_atom": energy / structure.num_sites}}) mpids = self.mpr.find_structure(structure) self._materials.update_one({"material_id": m["material_id"]}, {"$set": {"mpids": mpids}}) except: import traceback logger.exception("<---") logger.exception("There was an error processing material_id: {}".format(m)) logger.exception(traceback.format_exc()) logger.exception("--->") logger.info("MaterialsEhullBuilder finished processing.") def reset(self): logger.info("Resetting MaterialsEhullBuilder") self._materials.update_many({}, {"$unset": {"stability": 1}}) self._build_indexes() logger.info("Finished resetting MaterialsEhullBuilder") def _build_indexes(self): self._materials.create_index("stability.e_above_hull") @classmethod def from_file(cls, db_file, m="materials", **kwargs): """ Get a MaterialsEhullBuilder using only a db file Args: db_file: (str) path to db file m: (str) name of "materials" collection **kwargs: other parameters to feed into the builder, e.g. mapi_key """ db_write = get_database(db_file, admin=True) return cls(db_write[m], **kwargs)