def e_above(formula, energy): #formula = 'Li6AsN' #energy = -27.53 comp=Composition(formula) target = PDEntry(Composition(formula), energy) elements = list(comp.as_dict().keys()) #print(elements) a = MPRester("API_KEY") #Go to materialsproject.org and create account to get API_KEY #Entries are the basic unit for thermodynamic and other analyses in pymatgen. #This gets all entries belonging to the Ca-C-O system. # entries = a.get_entries_in_chemsys(['Ca', 'C', 'O']) entries = a.get_entries_in_chemsys(elements) #print(entries) pd=PD(entries) # pd.get_decomposition(comp) ehull = pd.get_e_above_hull(target) #plotter = PDPlotter(pd) #plotter.show() return ehull
def test_from_dict(self): sym_dict = {"Fe": 6, "O": 8} self.assertEqual( Composition.from_dict(sym_dict).reduced_formula, "Fe3O4", "Creation form sym_amount dictionary failed!") comp = Composition({"Fe2+": 2, "Fe3+": 4, "O2-": 8}) comp2 = Composition.from_dict(comp.as_dict()) self.assertEqual(comp, comp2)
def test_from_dict(self): sym_dict = {"Fe": 6, "O": 8} self.assertEqual( Composition.from_dict(sym_dict).reduced_formula, "Fe3O4", "Creation form sym_amount dictionary failed!" ) comp = Composition({"Fe2+": 2, "Fe3+": 4, "O2-": 8}) comp2 = Composition.from_dict(comp.as_dict()) self.assertEqual(comp, comp2)
def to_reduced_dict(self): """ Returns: dict with element symbol and reduced amount e.g., {"Fe": 2.0, "O":3.0}. """ reduced_formula = self._composition.reduced_formula c = Composition(reduced_formula) d = c.as_dict() d['charge'] = self._charge return d
def mlmd(x): ls = [] comp = Composition(x) most = comp.num_atoms data = np.array(list(comp.as_dict().values())) l = len(data) s = sum(data) a = max(data) i = min(data) m = np.mean(data) v = np.var(data) ls.append([most, a, i, m, v, l]) df = pd.DataFrame(ls) return (df)
def formula2onehot_matrix(formula, l=8): onehot = build_entry() obj = Composition(formula) d = obj.as_dict() if max(d.values()) <= l: matrix = np.zeros((len(onehot), l)) for symbol in d.keys(): matrix[onehot[symbol], int(d[symbol]) - 1] = 1 matrix = np.expand_dims(matrix, -1) matrix = np.expand_dims(matrix, 0) return matrix else: warnings.warn('The number of single element in composition ' 'can not surpass {}.'.format(l)) return None
def get_onehot_matrix(l=8): onehot = build_entry() df = pd.read_csv('data/mpid_formula_sp.csv') formulas = df.pretty_formula.values formulas = list(set(formulas)) data = {} for c in formulas: if isinstance(c, float): continue obj = Composition(c) d = obj.as_dict() if max(d.values()) <= l: matrix = np.zeros((len(onehot), l)) for symbol in d.keys(): matrix[onehot[symbol], int(d[symbol]) - 1] = 1 matrix = np.expand_dims(matrix, -1) data[c] = matrix return data
def formula_query_dict(query_string): query_comp = Composition(query_string) comp_dict = query_comp.as_dict() if 'Li' in comp_dict: comp_dict.pop('Li') query_comp = Composition.from_dict(comp_dict) query_elements = [ielement.name for ielement in query_comp.elements] query_regex = [ f"(?=.*{query_elements[i]})" for i in range(len(query_elements)) ] query_regex.append('.*') query_regex = ''.join(query_regex) form_list = mongo_coll.find({ "formula_charge": { "$regex": query_regex } }).distinct("formula_charge") result_list = [ *filter(lambda x: comp_comp(query_comp, Composition(x)), form_list) ] return {"formula_charge": {"$in": result_list}}
class Entry(MSONable): """ An lightweight Entry object containing key computed data for storing purpose. """ def __init__(self, composition, calculator, inputs, data, entry_id=None, attribute=None, tag=None): """ Initializes a Entry. Args: composition (Composition): Composition of the entry. For flexibility, this can take the form of all the typical input taken by a Composition, including a {symbol: amt} dict, a string formula, and others. inputs (dict): An dict of parameters associated with the entry. Defaults to None. data (dict): An dict of any additional data associated with the entry. Defaults to None. entry_id (obj): An optional id to uniquely identify the entry. attribute: Optional attribute of the entry. This can be used to specify that the entry is a newly found compound, or to specify a particular label for the entry, or else ... Used for further analysis and plotting purposes. An attribute can be anything but must be MSONable. """ self.composition = Composition(composition) self.calculator = calculator self.inputs = inputs self.data = data self.entry_id = entry_id self.name = self.composition.reduced_formula self.attribute = attribute self.tag = tag #def __eq__(self,other): # if not self.composition == other.composition: # return False # if not self.calculator == other.calculator: # return False # if not self.inputs == other.inputs: # return False # if not self.data == other.data: # return False # if not self.name == other.name: # return False # if not self.attribute == other.attribute: # return False # if not self.tag == other.tag: # return False # return True @property def number_element(self): return len(self.composition) def __repr__(self): output = [ "Entry {} - {}".format(self.entry_id, self.composition.formula), "calculator: {}".format(self.calculator) ] return "\n".join(output) def __str__(self): return self.__repr__() @classmethod def from_dict(cls, d): dec = MontyDecoder() return cls(d["composition"], d["calculator"], inputs={ k: dec.process_decoded(v) for k, v in d.get("inputs", {}).items() }, data={ k: dec.process_decoded(v) for k, v in d.get("data", {}).items() }, entry_id=d.get("entry_id", None), attribute=d["attribute"] if "attribute" in d else None, tag=d["tag"] if "tag" in d else None) def as_dict(self): return { "@module": self.__class__.__module__, "@class": self.__class__.__name__, "composition": self.composition.as_dict(), "calculator": self.calculator, "inputs": json.loads(json.dumps(self.inputs, cls=MontyEncoder)), "data": json.loads(json.dumps(self.data, cls=MontyEncoder)), "entry_id": self.entry_id, "attribute": self.attribute, "tag": self.tag }
if bd_AML_0_3k.columns.tolist() == bd_0_3k.columns.tolist(): print(f'True') else: print(str(bd_AML_0_3k.columns.tolist())) print(str(bd_0_3k.columns.tolist())) bdBOOST_4_train_300p = pd.concat([bd_0_3k,bd_AML_0_3k]) bdBOOST_4_train_1000p = pd.concat([bd_1k,bd_AML_1k]) avail_formula_list = [] id_list = [] eg_list = [] for id, formula, Eg in zip(bdBOOST_4_train_1000p["id"], bdBOOST_4_train_1000p["composition"], bdBOOST_4_train_1000p["Eg"]): try: p = Composition(formula) if (len(p.as_dict()) > 1.0): avail_formula_list.append(formula) id_list.append(id) eg_list.append(Eg) except: continue # with open('./bd_AML_30_BOOST_118.csv', 'w') as f: # f.write('\n'.join(avail_formula_list)) d = {'id': id_list, 'composition': avail_formula_list, 'Eg': eg_list} df = pd.DataFrame(data=d) df = df.drop_duplicates(subset='composition', keep='first') df.to_csv('./bdBOOST_train_init1000.csv', index=False, header=True, columns=["id","composition","Eg"]) bd_1k.to_csv('bd_original_train_init1000.csv', index=False, header=True, columns=["id","composition","Eg"])
class ComputedEntry(PDEntry, MSONable): """ An lightweight ComputedEntry object containing key computed data for many purposes. Extends a PDEntry so that it can be used for phase diagram generation. The difference between a ComputedEntry and a standard PDEntry is that it includes additional parameters like a correction and run_parameters. """ def __init__(self, composition, energy, correction=0.0, parameters=None, data=None, entry_id=None, attribute=None): """ Initializes a ComputedEntry. Args: composition (Composition): Composition of the entry. For flexibility, this can take the form of all the typical input taken by a Composition, including a {symbol: amt} dict, a string formula, and others. energy (float): Energy of the entry. Usually the final calculated energy from VASP or other electronic structure codes. correction (float): A correction to be applied to the energy. This is used to modify the energy for certain analyses. Defaults to 0.0. parameters (dict): An optional dict of parameters associated with the entry. Defaults to None. data (dict): An optional dict of any additional data associated with the entry. Defaults to None. entry_id (obj): An optional id to uniquely identify the entry. attribute: Optional attribute of the entry. This can be used to specify that the entry is a newly found compound, or to specify a particular label for the entry, or else ... Used for further analysis and plotting purposes. An attribute can be anything but must be MSONable. """ self.uncorrected_energy = energy self.composition = Composition(composition) self.correction = correction self.parameters = parameters if parameters else {} self.data = data if data else {} self.entry_id = entry_id self.name = self.composition.reduced_formula self.attribute = attribute @property def energy(self): """ Returns the *corrected* energy of the entry. """ return self.uncorrected_energy + self.correction def __repr__(self): output = ["ComputedEntry {}".format(self.composition.formula), "Energy = {:.4f}".format(self.uncorrected_energy), "Correction = {:.4f}".format(self.correction), "Parameters:"] for k, v in self.parameters.items(): output.append("{} = {}".format(k, v)) output.append("Data:") for k, v in self.data.items(): output.append("{} = {}".format(k, v)) return "\n".join(output) def __str__(self): return self.__repr__() @classmethod def from_dict(cls, d): dec = MontyDecoder() return cls(d["composition"], d["energy"], d["correction"], dec.process_decoded(d.get("parameters", {})), dec.process_decoded(d.get("data", {})), entry_id=d.get("entry_id", None), attribute=d["attribute"] if "attribute" in d else None) def as_dict(self): return {"@module": self.__class__.__module__, "@class": self.__class__.__name__, "energy": self.uncorrected_energy, "composition": self.composition.as_dict(), "correction": self.correction, "parameters": json.loads(json.dumps(self.parameters, cls=MontyEncoder)), "data": json.loads(json.dumps(self.data, cls=MontyEncoder)), "entry_id": self.entry_id, "attribute": self.attribute}
class PDEntry(PMGSONable): """ An object encompassing all relevant data for phase diagrams. .. attribute:: name A name for the entry. This is the string shown in the phase diagrams. By default, this is the reduced formula for the composition, but can be set to some other string for display purposes. Args: comp: Composition as a pymatgen.core.structure.Composition energy: Energy for composition. name: Optional parameter to name the entry. Defaults to the reduced chemical formula. attribute: Optional attribute of the entry. This can be used to specify that the entry is a newly found compound, or to specify a particular label for the entry, or else ... Used for further analysis and plotting purposes. An attribute can be anything but must be PMGSONable. """ def __init__(self, composition, energy, name=None, attribute=None): self.energy = energy self.composition = Composition(composition) self.name = name if name else self.composition.reduced_formula self.attribute = attribute @property def energy_per_atom(self): """ Returns the final energy per atom. """ return self.energy / self.composition.num_atoms @property def is_element(self): """ True if the entry is an element. """ return self.composition.is_element def __repr__(self): return "PDEntry : {} with energy = {:.4f}".format(self.composition, self.energy) def __str__(self): return self.__repr__() def as_dict(self): return {"@module": self.__class__.__module__, "@class": self.__class__.__name__, "composition": self.composition.as_dict(), "energy": self.energy, "name": self.name, "attribute": self.attribute} @classmethod def from_dict(cls, d): return cls(Composition(d["composition"]), d["energy"], d["name"], d["attribute"] if "attribute" in d else None)
class ComputedEntry(MSONable): """ An lightweight ComputedEntry object containing key computed data for many purposes. Extends a PDEntry so that it can be used for phase diagram generation. The difference between a ComputedEntry and a standard PDEntry is that it includes additional parameters like a correction and run_parameters. """ def __init__(self, composition: Composition, energy: float, correction: float = 0.0, parameters: dict = None, data: dict = None, entry_id: object = None): """ Initializes a ComputedEntry. Args: composition (Composition): Composition of the entry. For flexibility, this can take the form of all the typical input taken by a Composition, including a {symbol: amt} dict, a string formula, and others. energy (float): Energy of the entry. Usually the final calculated energy from VASP or other electronic structure codes. correction (float): A correction to be applied to the energy. This is used to modify the energy for certain analyses. Defaults to 0.0. parameters (dict): An optional dict of parameters associated with the entry. Defaults to None. data (dict): An optional dict of any additional data associated with the entry. Defaults to None. entry_id (obj): An optional id to uniquely identify the entry. """ self.uncorrected_energy = energy self.composition = Composition(composition) self.correction = correction self.parameters = parameters if parameters else {} self.data = data if data else {} self.entry_id = entry_id self.name = self.composition.reduced_formula def normalize(self, mode: str = "formula_unit") -> None: """ Normalize the entry's composition, energy and any corrections. Generally, this would not have effect on any Args: mode: "formula_unit" is the default, which normalizes to composition.reduced_formula. The other option is "atom", which normalizes such that the composition amounts sum to 1. """ if mode == "atom": factor = self.composition.num_atoms comp = self.composition / factor else: comp, factor = self.composition.get_reduced_composition_and_factor( ) self.composition = comp self.uncorrected_energy /= factor self.correction /= factor @property def is_element(self) -> bool: """ :return: Whether composition of entry is an element. """ return self.composition.is_element @property def energy(self) -> float: """ :return: the *corrected* energy of the entry. """ return self.uncorrected_energy + self.correction @property def energy_per_atom(self) -> float: """ :return: the *corrected* energy per atom of the entry. """ return self.energy / self.composition.num_atoms def __repr__(self): output = [ "ComputedEntry {} - {}".format(self.entry_id, self.composition.formula), "Energy = {:.4f}".format(self.uncorrected_energy), "Correction = {:.4f}".format(self.correction), "Parameters:" ] for k, v in self.parameters.items(): output.append("{} = {}".format(k, v)) output.append("Data:") for k, v in self.data.items(): output.append("{} = {}".format(k, v)) return "\n".join(output) def __str__(self): return self.__repr__() @classmethod def from_dict(cls, d) -> 'ComputedEntry': """ :param d: Dict representation. :return: ComputedEntry """ dec = MontyDecoder() return cls(d["composition"], d["energy"], d["correction"], parameters={ k: dec.process_decoded(v) for k, v in d.get("parameters", {}).items() }, data={ k: dec.process_decoded(v) for k, v in d.get("data", {}).items() }, entry_id=d.get("entry_id", None)) def as_dict(self) -> dict: """ :return: MSONable dict. """ return { "@module": self.__class__.__module__, "@class": self.__class__.__name__, "energy": self.uncorrected_energy, "composition": self.composition.as_dict(), "correction": self.correction, "parameters": json.loads(json.dumps(self.parameters, cls=MontyEncoder)), "data": json.loads(json.dumps(self.data, cls=MontyEncoder)), "entry_id": self.entry_id }
bd_AML_30_BOOST = pd.read_csv( '/home/glard/AML/roost/roost/examples/prepared_training_data/bandgap4new_model.csv' ) bd_AML_30_BOOST = bd_AML_30_BOOST.drop_duplicates(subset=['composition'], keep='first') bd_AML_30_BOOST['id'] = list(range(1, bd_AML_30_BOOST["composition"].size + 1)) avail_formula_list = [] id_list = [] eg_list = [] for id, formula, Eg in zip(bd_AML_30_BOOST["id"], bd_AML_30_BOOST["composition"], bd_AML_30_BOOST["Eg"]): try: p = Composition(formula) if (len(p.as_dict()) > 1.0 and all(x < 9 for x in p.as_dict().values())): avail_formula_list.append(formula) id_list.append(id) eg_list.append(Eg) except: continue # with open('./bd_AML_30_BOOST_118.csv', 'w') as f: # f.write('\n'.join(avail_formula_list)) d = {'id': id_list, 'composition': avail_formula_list, 'Eg': eg_list} df = pd.DataFrame(data=d) df.to_csv('./bd_AML_whole_for_autoencoder.csv', index=False, header=True, columns=["id", "composition", "Eg"])
import pandas as pd from pymatgen.core.composition import Composition import random import tensorflow as tf onehot = build_entry() #df = pd.read_csv('data/mpid_formula_sp.csv') df = pd.read_csv('./cleaned_formula.csv') formulas = df.pretty_formula.values formulas = list(set(formulas)) data = {} for c in formulas: if isinstance(c, float): continue obj = Composition(c) d = obj.as_dict() if max(d.values()) <= 8: matrix = np.zeros((len(onehot), 8)) for symbol in d.keys(): matrix[onehot[symbol], int(d[symbol]) - 1] = 1 matrix = np.expand_dims(matrix, -1) data[c] = matrix pretty_formula = random.sample(list(data), 5000) # >>> keys # [52, 3, 10, 92, 86, 42, 99, 73, 56, 23] onehot_from_formula = [data[k] for k in pretty_formula] model = tf.keras.models.load_model('models/best_autoencoder.h5') inputs = model.get_layer('decoder').input outputs = model.get_layer('decoder').get_layer('output').output decoder = tf.keras.models.Model(inputs=inputs, outputs=outputs) fail_num = 0
class Ion(PMGSONable): """ Basic ion object. It is just a Composition object with an additional variable to store charge. The net charge can either be represented as Mn++, or Mn+2, or Mn[2+]. Note the order of the sign and magnitude in each representation. """ def __init__(self, composition, charge=0.0, properties=None): """ Flexible Ion construction, similar to Composition. For more information, please see pymatgen.core.Composition """ self._composition = Composition(composition) self._charge = charge self._properties = properties if properties else {} def __getattr__(self, a): if a in self._properties: return self._properties[a] try: return getattr(self._composition, a) except: raise AttributeError(a) @staticmethod def from_formula(formula): charge = 0.0 f = formula m = re.search(r"\[([^\[\]]+)\]", f) if m: m_chg = re.search("([\.\d]*)([+-])", m.group(1)) if m_chg: if m_chg.group(1) != "": charge += float(m_chg.group(1)) * \ (float(m_chg.group(2) + "1")) else: charge += float(m_chg.group(2) + "1") f = f.replace(m.group(), "", 1) m = re.search(r"\(aq\)", f) if m: f = f.replace(m.group(), "", 1) for m_chg in re.finditer("([+-])([\.\d]*)", f): sign = m_chg.group(1) sgn = float(str(sign + "1")) if m_chg.group(2).strip() != "": charge += float(m_chg.group(2)) * sgn else: charge += sgn f = f.replace(m_chg.group(), "", 1) composition = Composition(f) return Ion(composition, charge) @property def formula(self): """ Returns a formula string, with elements sorted by electronegativity, e.g., Li4 Fe4 P4 O16. """ formula = self._composition.formula chg_str = "" if self._charge > 0: chg_str = " +" + formula_double_format(self._charge, False) elif self._charge < 0: chg_str = " " + formula_double_format(self._charge, False) return formula + chg_str @property def anonymized_formula(self): """ An anonymized formula. Appends charge to the end of anonymized composition """ anon_formula = self._composition.anonymized_formula chg = self._charge chg_str = "" if chg > 0: chg_str += ("{}{}".format('+', str(int(chg)))) elif chg < 0: chg_str += ("{}{}".format('-', str(int(np.abs(chg))))) return anon_formula + chg_str @property def reduced_formula(self): """ Returns a reduced formula string with appended charge. """ reduced_formula = self._composition.reduced_formula charge = self._charge / float( self._composition.get_reduced_composition_and_factor()[1]) if charge > 0: if abs(charge) == 1: chg_str = "[+]" else: chg_str = "[" + formula_double_format(charge, False) + "+]" elif charge < 0: if abs(charge) == 1: chg_str = "[-]" else: chg_str = "[{}-]".format( formula_double_format(abs(charge), False)) else: chg_str = "(aq)" return reduced_formula + chg_str @property def alphabetical_formula(self): """ Returns a reduced formula string with appended charge """ alph_formula = self._composition.alphabetical_formula chg_str = "" if self._charge > 0: chg_str = " +" + formula_double_format(self._charge, False) elif self._charge < 0: chg_str = " " + formula_double_format(self._charge, False) return alph_formula + chg_str @property def charge(self): """ Charge of the ion """ return self._charge @property def composition(self): """ Return composition object """ return self._composition def as_dict(self): """ Returns: dict with composition, as well as charge """ d = self._composition.as_dict() d['charge'] = self._charge return d @classmethod def from_dict(cls, d): """ Generates an ion object from a dict created by as_dict(). Args: d: {symbol: amount} dict. """ # composition = Composition.from_dict(d['composition']) charge = d['charge'] composition = Composition({i: d[i] for i in d if i != 'charge'}) return Ion(composition, charge) @property def to_reduced_dict(self): """ Returns: dict with element symbol and reduced amount e.g., {"Fe": 2.0, "O":3.0}. """ reduced_formula = self._composition.reduced_formula c = Composition(reduced_formula) d = c.as_dict() d['charge'] = self._charge return d def __eq__(self, other): if self.composition != other.composition: return False if self.charge != other.charge: return False return True def __ne__(self, other): return not self.__eq__(other) def __add__(self, other): """ Addition of two ions. """ new_composition = self.composition + other.composition new_charge = self.charge + other.charge return Ion(new_composition, new_charge) def __sub__(self, other): """ Subtraction of two ions """ new_composition = self.composition - other.composition new_charge = self.charge - other.charge return Ion(new_composition, new_charge) def __mul__(self, other): """ Multiplication of an Ion with a factor """ new_composition = self.composition * other new_charge = self.charge * other return Ion(new_composition, new_charge) def __hash__(self): #for now, just use the composition hash code. return self._composition.__hash__() def __len__(self): return len(self._composition) def __str__(self): return self.formula def __repr__(self): return "Ion: " + self.formula def __getitem__(self, el): return self._composition.get(el, 0)
class Ion(MSONable): """ Basic ion object. It is just a Composition object with an additional variable to store charge. The net charge can either be represented as Mn++, or Mn+2, or Mn[2+]. Note the order of the sign and magnitude in each representation. """ def __init__(self, composition, charge=0.0, properties=None): """ Flexible Ion construction, similar to Composition. For more information, please see pymatgen.core.Composition """ self._composition = Composition(composition) self._charge = charge self._properties = properties if properties else {} def __getattr__(self, a): if a in self._properties: return self._properties[a] try: return getattr(self._composition, a) except: raise AttributeError(a) @staticmethod def from_formula(formula): charge = 0.0 f = formula m = re.search(r"\[([^\[\]]+)\]", f) if m: m_chg = re.search(r"([\.\d]*)([+-])", m.group(1)) if m_chg: if m_chg.group(1) != "": charge += float(m_chg.group(1)) * \ (float(m_chg.group(2) + "1")) else: charge += float(m_chg.group(2) + "1") f = f.replace(m.group(), "", 1) m = re.search(r"\(aq\)", f) if m: f = f.replace(m.group(), "", 1) for m_chg in re.finditer(r"([+-])([\.\d]*)", f): sign = m_chg.group(1) sgn = float(str(sign + "1")) if m_chg.group(2).strip() != "": charge += float(m_chg.group(2)) * sgn else: charge += sgn f = f.replace(m_chg.group(), "", 1) composition = Composition(f) return Ion(composition, charge) @property def formula(self): """ Returns a formula string, with elements sorted by electronegativity, e.g., Li4 Fe4 P4 O16. """ formula = self._composition.formula chg_str = "" if self._charge > 0: chg_str = " +" + formula_double_format(self._charge, False) elif self._charge < 0: chg_str = " " + formula_double_format(self._charge, False) return formula + chg_str @property def anonymized_formula(self): """ An anonymized formula. Appends charge to the end of anonymized composition """ anon_formula = self._composition.anonymized_formula chg = self._charge chg_str = "" if chg > 0: chg_str += ("{}{}".format('+', str(int(chg)))) elif chg < 0: chg_str += ("{}{}".format('-', str(int(np.abs(chg))))) return anon_formula + chg_str @property def reduced_formula(self): """ Returns a reduced formula string with appended charge. """ reduced_formula = self._composition.reduced_formula charge = self._charge / float(self._composition. get_reduced_composition_and_factor()[1]) if charge > 0: if abs(charge) == 1: chg_str = "[+]" else: chg_str = "[" + formula_double_format(charge, False) + "+]" elif charge < 0: if abs(charge) == 1: chg_str = "[-]" else: chg_str = "[{}-]".format(formula_double_format(abs(charge), False)) else: chg_str = "(aq)" return reduced_formula + chg_str @property def alphabetical_formula(self): """ Returns a reduced formula string with appended charge """ alph_formula = self._composition.alphabetical_formula chg_str = "" if self._charge > 0: chg_str = " +" + formula_double_format(self._charge, False) elif self._charge < 0: chg_str = " " + formula_double_format(self._charge, False) return alph_formula + chg_str @property def charge(self): """ Charge of the ion """ return self._charge @property def composition(self): """ Return composition object """ return self._composition def as_dict(self): """ Returns: dict with composition, as well as charge """ d = self._composition.as_dict() d['charge'] = self._charge return d @classmethod def from_dict(cls, d): """ Generates an ion object from a dict created by as_dict(). Args: d: {symbol: amount} dict. """ # composition = Composition.from_dict(d['composition']) charge = d['charge'] composition = Composition({i: d[i] for i in d if i != 'charge'}) return Ion(composition, charge) @property def to_reduced_dict(self): """ Returns: dict with element symbol and reduced amount e.g., {"Fe": 2.0, "O":3.0}. """ reduced_formula = self._composition.reduced_formula c = Composition(reduced_formula) d = c.as_dict() d['charge'] = self._charge return d def __eq__(self, other): if self.composition != other.composition: return False if self.charge != other.charge: return False return True def __ne__(self, other): return not self.__eq__(other) def __add__(self, other): """ Addition of two ions. """ new_composition = self.composition + other.composition new_charge = self.charge + other.charge return Ion(new_composition, new_charge) def __sub__(self, other): """ Subtraction of two ions """ new_composition = self.composition - other.composition new_charge = self.charge - other.charge return Ion(new_composition, new_charge) def __mul__(self, other): """ Multiplication of an Ion with a factor """ new_composition = self.composition * other new_charge = self.charge * other return Ion(new_composition, new_charge) def __hash__(self): #for now, just use the composition hash code. return self._composition.__hash__() def __len__(self): return len(self._composition) def __str__(self): return self.formula def __repr__(self): return "Ion: " + self.formula def __getitem__(self, el): return self._composition.get(el, 0)
class Entry(MSONable, metaclass=ABCMeta): """ A lightweight object containing the energy associated with a specific chemical composition. This base class is not intended to be instantiated directly. Note that classes which inherit from Entry must define a .energy property. """ def __init__(self, composition: Composition, energy: float): """ Initializes an Entry. Args: composition (Composition): Composition of the entry. For flexibility, this can take the form of all the typical input taken by a Composition, including a {symbol: amt} dict, a string formula, and others. energy (float): Energy of the entry. """ self._energy = energy self.composition = Composition(composition) @property def is_element(self) -> bool: """ :return: Whether composition of entry is an element. """ return self.composition.is_element @property @abstractmethod def energy(self) -> float: """ :return: the energy of the entry. """ @property def energy_per_atom(self) -> float: """ :return: the energy per atom of the entry. """ return self.energy / self.composition.num_atoms def __str__(self): return self.__repr__() def normalize( self, mode: str = "formula_unit", inplace: bool = True ) -> Optional["Entry"]: """ Normalize the entry's composition and energy. Args: mode: "formula_unit" is the default, which normalizes to composition.reduced_formula. The other option is "atom", which normalizes such that the composition amounts sum to 1. inplace: "True" is the default which normalises the current Entry object. Setting inplace to "False" returns a normalized copy of the Entry object. """ if inplace: factor = self._normalization_factor(mode) self.composition /= factor self._energy /= factor return None else: entry = copy.deepcopy(self) factor = entry._normalization_factor(mode) entry.composition /= factor entry._energy /= factor return entry def _normalization_factor(self, mode: str = "formula_unit") -> float: if mode == "atom": factor = self.composition.num_atoms else: comp, factor = self.composition.get_reduced_composition_and_factor() return factor def as_dict(self) -> dict: """ :return: MSONable dict. """ return { "@module": self.__class__.__module__, "@class": self.__class__.__name__, "energy": self._energy, "composition": self.composition.as_dict(), }
class PDEntry(MSONable): """ An object encompassing all relevant data for phase diagrams. .. attribute:: name A name for the entry. This is the string shown in the phase diagrams. By default, this is the reduced formula for the composition, but can be set to some other string for display purposes. Args: comp: Composition as a pymatgen.core.structure.Composition energy: Energy for composition. name: Optional parameter to name the entry. Defaults to the reduced chemical formula. attribute: Optional attribute of the entry. This can be used to specify that the entry is a newly found compound, or to specify a particular label for the entry, or else ... Used for further analysis and plotting purposes. An attribute can be anything but must be MSONable. """ def __init__(self, composition, energy, name=None, attribute=None): self.energy = energy self.composition = Composition(composition) self.name = name if name else self.composition.reduced_formula self.attribute = attribute @property def energy_per_atom(self): """ Returns the final energy per atom. """ return self.energy / self.composition.num_atoms @property def is_element(self): """ True if the entry is an element. """ return self.composition.is_element def __repr__(self): return "PDEntry : {} with energy = {:.4f}".format( self.composition, self.energy) def __str__(self): return self.__repr__() def as_dict(self): return { "@module": self.__class__.__module__, "@class": self.__class__.__name__, "composition": self.composition.as_dict(), "energy": self.energy, "name": self.name, "attribute": self.attribute } @classmethod def from_dict(cls, d): return cls(Composition(d["composition"]), d["energy"], d["name"], d["attribute"] if "attribute" in d else None)
class Entry(MSONable, metaclass=ABCMeta): """ A lightweight object containing the energy associated with a specific chemical composition. This base class is not intended to be instantiated directly. Note that classes which inherit from Entry must define a .energy property. """ def __init__( self, composition: Union[Composition, str, Dict[str, float]], energy: float, ): """ Initializes an Entry. Args: composition (Composition): Composition of the entry. For flexibility, this can take the form of all the typical input taken by a Composition, including a {symbol: amt} dict, a string formula, and others. energy (float): Energy of the entry. """ self._composition = Composition(composition) self._energy = energy @property def is_element(self) -> bool: """ :return: Whether composition of entry is an element. """ # NOTE _composition rather than composition as GrandPDEntry # edge case exists if we have a compound where chempots are # given for all bar one element type return self._composition.is_element @property def composition(self) -> Composition: """ :return: the composition of the entry. """ return self._composition @property @abstractmethod def energy(self) -> float: """ :return: the energy of the entry. """ @property def energy_per_atom(self) -> float: """ :return: the energy per atom of the entry. """ return self.energy / self.composition.num_atoms def __repr__(self): return f"{self.__class__.__name__} : {self.composition} with energy = {self.energy:.4f}" def __str__(self): return self.__repr__() def normalize( self, mode: Literal["formula_unit", "atom"] = "formula_unit") -> "Entry": """ Normalize the entry's composition and energy. Args: mode ("formula_unit" | "atom"): "formula_unit" (the default) normalizes to composition.reduced_formula. "atom" normalizes such that the composition amounts sum to 1. """ factor = self._normalization_factor(mode) new_composition = self._composition / factor new_energy = self._energy / factor new_entry_dict = self.as_dict() new_entry_dict["composition"] = new_composition.as_dict() new_entry_dict["energy"] = new_energy return self.from_dict(new_entry_dict) def _normalization_factor( self, mode: Literal["formula_unit", "atom"] = "formula_unit") -> float: # NOTE here we use composition rather than _composition in order to ensure # that we have the expected behavior downstream in cases where composition # is overwritten (GrandPotPDEntry, TransformedPDEntry) if mode == "atom": factor = self.composition.num_atoms elif mode == "formula_unit": factor = self.composition.get_reduced_composition_and_factor()[1] else: raise ValueError( f"{mode} is not an allowed option for normalization") return factor def as_dict(self) -> dict: """ :return: MSONable dict. """ return { "@module": self.__class__.__module__, "@class": self.__class__.__name__, "energy": self._energy, "composition": self._composition.as_dict(), } def __eq__(self, other): # NOTE: Scaled duplicates i.e. physically equivalent materials # are not equal unless normalized separately. if self is other: return True # Equality is defined based on composition and energy # If structures are involved, it is assumed that a {composition, energy} is # vanishingly unlikely to be the same if the structures are different if not np.allclose(self.energy, other.energy): return False return self.composition == other.composition def __hash__(self): # NOTE truncate _energy to 8 dp to ensure same robustness # as np.allclose return hash( f"{self.__class__.__name__}{self._composition.formula}{self._energy:.8f}" )
class Entry(MSONable, metaclass=ABCMeta): """ A lightweight object containing the energy associated with a specific chemical composition. This base class is not intended to be instantiated directly. Note that classes which inherit from Entry must define a .energy property. """ def __init__( self, composition: Composition, energy: float, ): """ Initializes an Entry. Args: composition (Composition): Composition of the entry. For flexibility, this can take the form of all the typical input taken by a Composition, including a {symbol: amt} dict, a string formula, and others. energy (float): Energy of the entry. """ self._composition = Composition(composition) self._energy = energy @property def is_element(self) -> bool: """ :return: Whether composition of entry is an element. """ # NOTE _composition rather than composition as GrandPDEntry # edge case exists if we have a compound where chempots are # given for all bar one element type return self._composition.is_element @property def composition(self) -> Composition: """ :return: the composition of the entry. """ return self._composition @property @abstractmethod def energy(self) -> float: """ :return: the energy of the entry. """ @property def energy_per_atom(self) -> float: """ :return: the energy per atom of the entry. """ return self.energy / self.composition.num_atoms def __repr__(self): return "{} : {} with energy = {:.4f}".format(self.__class__.__name__, self.composition, self.energy) def __str__(self): return self.__repr__() def normalize(self, mode: str = "formula_unit", inplace: bool = True) -> Optional["Entry"]: """ Normalize the entry's composition and energy. Args: mode: "formula_unit" is the default, which normalizes to composition.reduced_formula. The other option is "atom", which normalizes such that the composition amounts sum to 1. inplace: "True" is the default which normalises the current Entry object. Setting inplace to "False" returns a normalized copy of the Entry object. """ if inplace: factor = self._normalization_factor(mode) self._composition /= factor self._energy /= factor return None entry = copy.deepcopy(self) entry.normalize(mode, inplace=True) return entry def _normalization_factor(self, mode: str = "formula_unit") -> float: # NOTE here we use composition rather than _composition in order to ensure # that we have the expected behaviour downstream in cases where composition # is overwritten (GrandPotPDEntry, TransformedPDEntry) if mode == "atom": factor = self.composition.num_atoms elif mode == "formula_unit": factor = self.composition.get_reduced_composition_and_factor()[1] else: raise ValueError( "`{}` is not an allowed option for normalization".format(mode)) return factor def as_dict(self) -> dict: """ :return: MSONable dict. """ return { "@module": self.__class__.__module__, "@class": self.__class__.__name__, "energy": self._energy, "composition": self._composition.as_dict(), } def __eq__(self, other): # NOTE Scaled duplicates i.e. physically equivalent materials # are not equal unless normalized separately if self is other: return True if isinstance(other, self.__class__): return self._is_dict_eq(other) return False def _is_dict_eq(self, other): """ Check if entry dicts are equal using a robust check for numerical values. """ self_dict = self.as_dict() other_dict = other.as_dict() # NOTE use implicit generator to allow all() to short-circuit return all( _is_robust_eq(other_dict[k], v) for k, v in self_dict.items()) def __hash__(self): # NOTE truncate _energy to 8 dp to ensure same robustness # as np.allclose return hash(f"{self.__class__.__name__}" f"{self._composition.formula}" f"{self._energy:.8f}")
def compute_corrections(self, exp_entries: list, calc_entries: dict) -> dict: """ Computes the corrections and fills in correction, corrections_std_error, and corrections_dict. Args: exp_entries: list of dictionary objects with the following keys/values: {"formula": chemical formula, "exp energy": formation energy in eV/formula unit, "uncertainty": uncertainty in formation energy} calc_entries: dictionary of computed entries, of the form {chemical formula: ComputedEntry} Raises: ValueError: calc_compounds is missing an entry """ self.exp_compounds = exp_entries self.calc_compounds = calc_entries self.names: List[str] = [] self.diffs: List[float] = [] self.coeff_mat: List[List[float]] = [] self.exp_uncer: List[float] = [] # remove any corrections in calc_compounds for entry in self.calc_compounds.values(): entry.correction = 0 for cmpd_info in self.exp_compounds: # to get consistent element ordering in formula name = Composition(cmpd_info["formula"]).reduced_formula allow = True compound = self.calc_compounds.get(name, None) if not compound: warnings.warn( "Compound {} is not found in provided computed entries and is excluded from the fit" .format(name)) continue # filter out compounds with large uncertainties relative_uncertainty = abs(cmpd_info["uncertainty"] / cmpd_info["exp energy"]) if relative_uncertainty > self.max_error: allow = False warnings.warn( "Compound {} is excluded from the fit due to high experimental uncertainty ({}%)" .format(name, relative_uncertainty)) # filter out compounds containing certain polyanions for anion in self.exclude_polyanions: if anion in name or anion in cmpd_info["formula"]: allow = False warnings.warn( "Compound {} contains the polyanion {} and is excluded from the fit" .format(name, anion)) break # filter out compounds that are unstable if isinstance(self.allow_unstable, float): try: eah = compound.data["e_above_hull"] except KeyError: raise ValueError("Missing e above hull data") if eah > self.allow_unstable: allow = False warnings.warn( "Compound {} is unstable and excluded from the fit (e_above_hull = {})" .format(name, eah)) if allow: comp = Composition(name) elems = list(comp.as_dict()) reactants = [] for elem in elems: try: elem_name = Composition(elem).reduced_formula reactants.append(self.calc_compounds[elem_name]) except KeyError: raise ValueError("Computed entries missing " + elem) rxn = ComputedReaction(reactants, [compound]) rxn.normalize_to(comp) energy = rxn.calculated_reaction_energy coeff = [] for specie in self.species: if specie == "oxide": if compound.data["oxide_type"] == "oxide": coeff.append(comp["O"]) self.oxides.append(name) else: coeff.append(0) elif specie == "peroxide": if compound.data["oxide_type"] == "peroxide": coeff.append(comp["O"]) self.peroxides.append(name) else: coeff.append(0) elif specie == "superoxide": if compound.data["oxide_type"] == "superoxide": coeff.append(comp["O"]) self.superoxides.append(name) else: coeff.append(0) elif specie == "S": if Element("S") in comp: sf_type = "sulfide" if compound.data.get("sulfide_type"): sf_type = compound.data["sulfide_type"] elif hasattr(compound, "structure"): sf_type = sulfide_type(compound.structure) if sf_type == "sulfide": coeff.append(comp["S"]) self.sulfides.append(name) else: coeff.append(0) else: coeff.append(0) else: try: coeff.append(comp[specie]) except ValueError: raise ValueError( "We can't detect this specie: {}".format( specie)) self.names.append(name) self.diffs.append( (cmpd_info["exp energy"] - energy) / comp.num_atoms) self.coeff_mat.append([i / comp.num_atoms for i in coeff]) self.exp_uncer.append( (cmpd_info["uncertainty"]) / comp.num_atoms) # for any exp entries with no uncertainty value, assign average uncertainty value sigma = np.array(self.exp_uncer) sigma[sigma == 0] = np.nan with warnings.catch_warnings(): warnings.simplefilter( "ignore", category=RuntimeWarning ) # numpy raises warning if the entire array is nan values mean_uncer = np.nanmean(sigma) sigma = np.where(np.isnan(sigma), mean_uncer, sigma) if np.isnan(mean_uncer): # no uncertainty values for any compounds, don't try to weight popt, self.pcov = curve_fit(_func, self.coeff_mat, self.diffs, p0=np.ones(len(self.species))) else: popt, self.pcov = curve_fit( _func, self.coeff_mat, self.diffs, p0=np.ones(len(self.species)), sigma=sigma, absolute_sigma=True, ) self.corrections = popt.tolist() self.corrections_std_error = np.sqrt(np.diag(self.pcov)).tolist() for i in range(len(self.species)): self.corrections_dict[self.species[i]] = ( round(self.corrections[i], 3), round(self.corrections_std_error[i], 4), ) # set ozonide correction to 0 so that this species does not recieve a correction # while other oxide types do self.corrections_dict["ozonide"] = (0, 0) return self.corrections_dict
class ComputedEntry(PDEntry, MSONable): """ An lightweight ComputedEntry object containing key computed data for many purposes. Extends a PDEntry so that it can be used for phase diagram generation. The difference between a ComputedEntry and a standard PDEntry is that it includes additional parameters like a correction and run_parameters. """ def __init__(self, composition, energy, correction=0.0, parameters=None, data=None, entry_id=None, attribute=None): """ Initializes a ComputedEntry. Args: composition (Composition): Composition of the entry. For flexibility, this can take the form of all the typical input taken by a Composition, including a {symbol: amt} dict, a string formula, and others. energy (float): Energy of the entry. Usually the final calculated energy from VASP or other electronic structure codes. correction (float): A correction to be applied to the energy. This is used to modify the energy for certain analyses. Defaults to 0.0. parameters (dict): An optional dict of parameters associated with the entry. Defaults to None. data (dict): An optional dict of any additional data associated with the entry. Defaults to None. entry_id (obj): An optional id to uniquely identify the entry. attribute: Optional attribute of the entry. This can be used to specify that the entry is a newly found compound, or to specify a particular label for the entry, or else ... Used for further analysis and plotting purposes. An attribute can be anything but must be MSONable. """ self.uncorrected_energy = energy self.composition = Composition(composition) self.correction = correction self.parameters = parameters if parameters else {} self.data = data if data else {} self.entry_id = entry_id self.name = self.composition.reduced_formula self.attribute = attribute @property def energy(self): """ Returns the *corrected* energy of the entry. """ return self.uncorrected_energy + self.correction def __repr__(self): output = [ "ComputedEntry {}".format(self.composition.formula), "Energy = {:.4f}".format(self.uncorrected_energy), "Correction = {:.4f}".format(self.correction), "Parameters:" ] for k, v in self.parameters.items(): output.append("{} = {}".format(k, v)) output.append("Data:") for k, v in self.data.items(): output.append("{} = {}".format(k, v)) return "\n".join(output) def __str__(self): return self.__repr__() @classmethod def from_dict(cls, d): dec = MontyDecoder() return cls(d["composition"], d["energy"], d["correction"], dec.process_decoded(d.get("parameters", {})), dec.process_decoded(d.get("data", {})), entry_id=d.get("entry_id", None), attribute=d["attribute"] if "attribute" in d else None) def as_dict(self): return { "@module": self.__class__.__module__, "@class": self.__class__.__name__, "energy": self.uncorrected_energy, "composition": self.composition.as_dict(), "correction": self.correction, "parameters": json.loads(json.dumps(self.parameters, cls=MontyEncoder)), "data": json.loads(json.dumps(self.data, cls=MontyEncoder)), "entry_id": self.entry_id, "attribute": self.attribute }
class Entry(MSONable): """ An lightweight Entry object containing key computed data for storing purpose. """ def __init__(self, composition, calculator, inputs, data, entry_id=None, attribute=None, tag=None): """ Initializes a Entry. Args: composition (Composition): Composition of the entry. For flexibility, this can take the form of all the typical input taken by a Composition, including a {symbol: amt} dict, a string formula, and others. inputs (dict): An dict of parameters associated with the entry. Defaults to None. data (dict): An dict of any additional data associated with the entry. Defaults to None. entry_id (obj): An optional id to uniquely identify the entry. attribute: Optional attribute of the entry. This can be used to specify that the entry is a newly found compound, or to specify a particular label for the entry, or else ... Used for further analysis and plotting purposes. An attribute can be anything but must be MSONable. """ self.composition = Composition(composition) self.calculator = calculator self.inputs = inputs self.data = data self.entry_id = entry_id if entry_id else None self.name = self.composition.reduced_formula self.attribute = attribute self.tag = tag @property def number_element(self): return len(self.composition) def __repr__(self): output = [ "Entry {} - {}".format(self.entry_id, self.composition.formula), "calculator: {}".format(self.calculator) ] return "\n".join(output) def __str__(self): return self.__repr__() @classmethod def from_dict(cls, d): dec = MontyDecoder() return cls(d["composition"], d["calculator"], inputs={ k: dec.process_decoded(v) for k, v in d.get("inputs", {}).items() }, data={ k: dec.process_decoded(v) for k, v in d.get("data", {}).items() }, entry_id=d.get("entry_id", None), attribute=d["attribute"] if "attribute" in d else None, tag=d["tag"] if "tag" in d else None) @classmethod def load(cls, filename, Cls=None): with open(filename, 'r') as f: fc = f.read() jc = json.loads(fc) composition = jc['composition'] calculator = jc['calculator'] if calculator.lower() == 'vasp': try: inputs = VaspInput.from_dict(jc['inputs']).as_dict() except: inputs = jc['inputs'] warnings.warn("""Inproperly configure of POTCAR ! Returned instance cannot be used as input for from_dict() method """) else: if Cls: inputs = Cls.from_dict(jc['inputs']).as_dict() else: raise RuntimeError("inputs decoder must be given") data = LabeledSystem.from_dict(jc['data']).as_dict() attribute = jc['attribute'] entry_id = jc['entry_id'] tag = jc['tag'] return cls(composition, calculator, inputs, data, entry_id, attribute, tag) def as_dict(self): return { "@module": self.__class__.__module__, "@class": self.__class__.__name__, "composition": self.composition.as_dict(), "calculator": self.calculator, "inputs": json.loads(json.dumps(self.inputs, cls=MontyEncoder)), "data": json.loads(json.dumps(self.data, cls=MontyEncoder)), "entry_id": self.entry_id, "attribute": self.attribute, "tag": self.tag }