def are_equal(self, sp1, sp2): """ True if element:amounts are exactly the same, i.e., oxidation state is not considered. Args: sp1: First species. A dict of {specie/element: amt} as per the definition in Site and PeriodicSite. sp2: Second species. A dict of {specie/element: amt} as per the definition in Site and PeriodicSite. Returns: Boolean indicating whether species are the same based on element and amounts. """ comp1 = Composition(sp1) comp2 = Composition(sp2) return comp1.get_el_amt_dict() == comp2.get_el_amt_dict()
def _generate_axis_ref(compounds): """ Here we do axis decomposition for a chemical formula. Not necessary for complexed systems. Inputs: compounds: a list of compound axis in string form: ['LiCoO2','CoO2'] Outputs: compSpecieNums: a dict recording how many species a compound has in its formula: {'CoO2':['Co4+':1,'O2-':2]} compUniqSpecies: a dict recording the 'marker specie' to a compound:{'LiCoO2':'Co3+','CoO2':'Co4+'} uniqSpecieComps: reversed dict of compUniqSpecies. """ ###Preprocessing### compSpecieNums = {} # Get representatEADME.mdie in a compound to make a calculation of composition from site enumeration easier. compUniqSpecies = {} for compStr in compounds: comp = Composition(compStr) compSpecieNums[compStr] = {} try: compChgDict = comp.oxi_state_guesses()[0] except: raise ValueError( 'Cannot use compound with non-integer valence as base compound!' ) compNumDict = comp.get_el_amt_dict() for specie in compChgDict: specieStr = specie + str(abs(int(compChgDict[specie]))) + ( '+' if compChgDict[specie] >= 0 else '-') compSpecieNums[compStr][specieStr] = compNumDict[specie] # print(compSpecieNums,specieChgDict) # print(compounds) for compStr in compounds: for specie in compSpecieNums[compStr]: specieIsUniq = True for otherCompStr in compounds: if ((specie in compSpecieNums[otherCompStr]) and compStr != otherCompStr): #print('Specie',specie,'in',compStr,'is not unique') specieIsUniq = False if specieIsUniq: compUniqSpecies[compStr] = specie break for compound in compounds: if compound not in compUniqSpecies: print('Can not generate axis. Specified reference compound {} does not have a unique specie. Exiting!'\ .format(compound)) sys.exit() uniqSpecieComps = {val: key for key, val in compUniqSpecies.items()} #print('uniqSpecieComps',uniqSpecieComps) return compSpecieNums, compUniqSpecies, uniqSpecieComps
def _get_poly_formula( self, geometry: Dict[str, Any], nn_sites: List[Dict[str, Any]], nnn_sites: List[Dict[str, Any]], ) -> Optional[str]: """Gets the polyhedra formula of the nearest neighbor atoms. The polyhedral formula is effectively the sorted nearest neighbor atoms in a reduced format. For example, if the nearest neighbors are 3 I atoms, 2 Br atoms and 1 Cl atom, the polyhedral formula will be "I3Br2Cl". The polyhedral formula will be ``None`` if the site geometry is not in :data:`robocrys.util.connected_geometries`. Args: geometry: The site geometry as produced by :meth:`SiteAnalyzer.get_site_geometry`. nn_sites: The nearest neighbor sites as produced by :meth:`SiteAnalyzer.get_nearest_neighbors`. nnn_sites: The next nearest neighbor sites as produced by :meth:`SiteAnalyzer.get_next_nearest_neighbors`. Returns: The polyhedral formula if the site geometry is in :data:`robocrys.util.connected_geometries` else ``None``. """ def order_elements(el): if self.use_iupac_formula: return [get_el_sp(el).X, el] else: return [get_el_sp(el).iupac_ordering, el] nnn_geometries = [nnn_site["geometry"] for nnn_site in nnn_sites] poly_formula = None if geometry["type"] in connected_geometries and any([ nnn_geometry["type"] in connected_geometries for nnn_geometry in nnn_geometries ]): nn_els = [get_el(nn_site["element"]) for nn_site in nn_sites] comp = Composition("".join(nn_els)) el_amt_dict = comp.get_el_amt_dict() poly_formula = "" for e in sorted(el_amt_dict.keys(), key=order_elements): poly_formula += e poly_formula += formula_double_format(el_amt_dict[e]) return poly_formula
def predict_k_g_list_of_entries(entries): """ Predict bulk (K) and shear (G) moduli from a list of entries in the same format as retrieved from the Materials Project API. """ lvpa_list = [] cepa_list = [] rowH1A_list = [] rowHn3A_list = [] xH4A_list = [] xHn4A_list = [] matid_list = [] k_list = [] g_list = [] caveats_list = [] aiab_problem_list = [] # TODO: figure out if closing the query engine (using 'with' ctx mgr) is an issue # If it is a problem then try manually doing a session.close() for MPRester, but ignore for qe for entry in entries: caveats_str = '' aiab_flag = False f_block_flag = False weight_list = [] energy_list = [] row_list = [] x_list = [] # Construct per-element lists for this material composition = Composition(str(entry["pretty_formula"])) for element_key, amount in composition.get_el_amt_dict().items(): element = Element(element_key) weight_list.append(composition.get_atomic_fraction(element)) aiab_energy = get_element_aiab_energy( element_key) # aiab = atom-in-a-box if aiab_energy is None: aiab_flag = True break energy_list.append(aiab_energy) if element.block == 'f': f_block_flag = True row_list.append(element.row) x_list.append(element.X) # On error, add material to aiab_problem_list and continue with next material if aiab_flag: aiab_problem_list.append(str(entry["material_id"])) continue # Check caveats if bool(entry["is_hubbard"]): if len(caveats_str) > 0: caveats_str += " " caveats_str += CAVEAT_HUBBARD if f_block_flag: if len(caveats_str) > 0: caveats_str += " " caveats_str += CAVEAT_F_BLOCK # Calculate intermediate weighted averages (WA) for this material ewa = np.average(energy_list, weights=weight_list) # atom-in-a-box energy WA print(str(entry["material_id"])) # Append descriptors for this material to descriptor lists lvpa_list.append( math.log10(float(entry["volume"]) / float(entry["nsites"]))) cepa_list.append(float(entry["energy_per_atom"]) - ewa) rowH1A_list.append(holder_mean(row_list, 1.0, weights=weight_list)) rowHn3A_list.append(holder_mean(row_list, -3.0, weights=weight_list)) xH4A_list.append(holder_mean(x_list, 4.0, weights=weight_list)) xHn4A_list.append(holder_mean(x_list, -4.0, weights=weight_list)) matid_list.append(str(entry["material_id"])) caveats_list.append(caveats_str) # Check that at least one valid material was provided num_predictions = len(matid_list) if num_predictions > 0: # Construct descriptor arrays if (len(lvpa_list) != num_predictions or len(cepa_list) != num_predictions or len(rowH1A_list) != num_predictions or len(rowHn3A_list) != num_predictions or len(xH4A_list) != num_predictions or len(xHn4A_list) != num_predictions): return (None, None, None, None) k_descriptors = np.ascontiguousarray( [lvpa_list, rowH1A_list, cepa_list, xHn4A_list], dtype=float) g_descriptors = np.ascontiguousarray( [cepa_list, lvpa_list, rowHn3A_list, xH4A_list], dtype=float) # Allocate prediction arrays k_predictions = np.empty(num_predictions) g_predictions = np.empty(num_predictions) # Make predictions k_filename = os.path.join(os.path.dirname(__file__), DATAFILE_K) g_filename = os.path.join(os.path.dirname(__file__), DATAFILE_G) gbml.core.predict(k_filename, num_predictions, k_descriptors, k_predictions) gbml.core.predict(g_filename, num_predictions, g_descriptors, g_predictions) k_list = np.power(10.0, k_predictions).tolist() g_list = np.power(10.0, g_predictions).tolist() # Append aiab problem cases for entry in aiab_problem_list: matid_list.append(entry) k_list.append(None) g_list.append(None) caveats_list.append(CAVEAT_AIAB) if len(matid_list) == 0: return (None, None, None, None) else: return (matid_list, k_list, g_list, caveats_list)
def predict_k_g_list_of_entries(entries): """ Predict bulk (K) and shear (G) moduli from a list of entries in the same format as retrieved from the Materials Project API. """ lvpa_list = [] cepa_list = [] rowH1A_list = [] rowHn3A_list = [] xH4A_list = [] xHn4A_list = [] matid_list = [] k_list = [] g_list = [] caveats_list = [] aiab_problem_list = [] # TODO: figure out if closing the query engine (using 'with' ctx mgr) is an issue # If it is a problem then try manually doing a session.close() for MPRester, but ignore for qe for entry in entries: caveats_str = '' aiab_flag = False f_block_flag = False weight_list = [] energy_list = [] row_list = [] x_list = [] # Construct per-element lists for this material composition = Composition(str(entry["pretty_formula"])) for element_key, amount in composition.get_el_amt_dict().items(): element = Element(element_key) weight_list.append(composition.get_atomic_fraction(element)) aiab_energy = get_element_aiab_energy(element_key) # aiab = atom-in-a-box if aiab_energy is None: aiab_flag = True break energy_list.append(aiab_energy) if element.block == 'f': f_block_flag = True row_list.append(element.row) x_list.append(element.X) # On error, add material to aiab_problem_list and continue with next material if aiab_flag: aiab_problem_list.append(str(entry["material_id"])) continue # Check caveats if bool(entry["is_hubbard"]): if len(caveats_str) > 0: caveats_str += " " caveats_str += CAVEAT_HUBBARD if f_block_flag: if len(caveats_str) > 0: caveats_str += " " caveats_str += CAVEAT_F_BLOCK # Calculate intermediate weighted averages (WA) for this material ewa = np.average(energy_list, weights=weight_list) # atom-in-a-box energy WA print(str(entry["material_id"])) # Append descriptors for this material to descriptor lists lvpa_list.append(math.log10(float(entry["volume"]) / float(entry["nsites"]))) cepa_list.append(float(entry["energy_per_atom"]) - ewa) rowH1A_list.append(holder_mean(row_list, 1.0, weights=weight_list)) rowHn3A_list.append(holder_mean(row_list, -3.0, weights=weight_list)) xH4A_list.append(holder_mean(x_list, 4.0, weights=weight_list)) xHn4A_list.append(holder_mean(x_list, -4.0, weights=weight_list)) matid_list.append(str(entry["material_id"])) caveats_list.append(caveats_str) # Check that at least one valid material was provided num_predictions = len(matid_list) if num_predictions > 0: # Construct descriptor arrays if (len(lvpa_list) != num_predictions or len(cepa_list) != num_predictions or len(rowH1A_list) != num_predictions or len(rowHn3A_list) != num_predictions or len(xH4A_list) != num_predictions or len(xHn4A_list) != num_predictions): return (None, None, None, None) k_descriptors = np.ascontiguousarray([lvpa_list, rowH1A_list, cepa_list, xHn4A_list], dtype=float) g_descriptors = np.ascontiguousarray([cepa_list, lvpa_list, rowHn3A_list, xH4A_list], dtype=float) # Allocate prediction arrays k_predictions = np.empty(num_predictions) g_predictions = np.empty(num_predictions) # Make predictions k_filename = os.path.join(os.path.dirname(__file__),DATAFILE_K) g_filename = os.path.join(os.path.dirname(__file__),DATAFILE_G) gbml.core.predict(k_filename, num_predictions, k_descriptors, k_predictions) gbml.core.predict(g_filename, num_predictions, g_descriptors, g_predictions) k_list = np.power(10.0, k_predictions).tolist() g_list = np.power(10.0, g_predictions).tolist() # Append aiab problem cases for entry in aiab_problem_list: matid_list.append(entry) k_list.append(None) g_list.append(None) caveats_list.append(CAVEAT_AIAB) if len(matid_list) == 0: return (None, None, None, None) else: return (matid_list, k_list, g_list, caveats_list)
def predict_k_g_list(material_id_list, api_key=API_KEY, query_engine=None): """ Predict bulk (K) and shear (G) moduli for a list of materials. :param material_id_list: list of material-ID strings :param api_key: The API key used by pymatgen.matproj.rest.MPRester to connect to Materials Project :param query_engine: (Optional) QueryEngine object used to query materials instead of MPRester :return: (matid_list, predicted_k_list, predicted_g_list, caveats_list) Note that len(matid_list) may be less than len(material_id_list), if any requested material-IDs are not found. """ if len(material_id_list) == 0 or not isinstance(material_id_list, list): return (None, None, None, None ) # material_id_list not properly specified lvpa_list = [] cepa_list = [] rowH1A_list = [] rowHn3A_list = [] xH4A_list = [] xHn4A_list = [] matid_list = [] k_list = [] g_list = [] caveats_list = [] aiab_problem_list = [] # TODO: figure out if closing the query engine (using 'with' ctx mgr) is an issue # If it is a problem then try manually doing a session.close() for MPRester, but ignore for qe mpr = _get_mp_query(api_key, query_engine) for entry in mpr.query(criteria={"task_id": { "$in": material_id_list }}, properties=[ "material_id", "pretty_formula", "nsites", "volume", "energy_per_atom", "is_hubbard" ]): caveats_str = '' aiab_flag = False f_block_flag = False weight_list = [] energy_list = [] row_list = [] x_list = [] # Construct per-element lists for this material composition = Composition(str(entry["pretty_formula"])) for element_key, amount in composition.get_el_amt_dict().iteritems(): element = Element(element_key) weight_list.append(composition.get_atomic_fraction(element)) aiab_energy = get_element_aiab_energy( element_key) # aiab = atom-in-a-box if aiab_energy is None: aiab_flag = True break energy_list.append(aiab_energy) if element.block == 'f': f_block_flag = True row_list.append(element.row) x_list.append(element.X) # On error, add material to aiab_problem_list and continue with next material if aiab_flag: aiab_problem_list.append(str(entry["material_id"])) continue # Check caveats if bool(entry["is_hubbard"]): if len(caveats_str) > 0: caveats_str += " " caveats_str += CAVEAT_HUBBARD if f_block_flag: if len(caveats_str) > 0: caveats_str += " " caveats_str += CAVEAT_F_BLOCK # Calculate intermediate weighted averages (WA) for this material ewa = np.average(energy_list, weights=weight_list) # atom-in-a-box energy WA print str(entry["material_id"]) # Append descriptors for this material to descriptor lists lvpa_list.append( math.log10(float(entry["volume"]) / float(entry["nsites"]))) cepa_list.append(float(entry["energy_per_atom"]) - ewa) rowH1A_list.append(holder_mean(row_list, 1.0, weights=weight_list)) rowHn3A_list.append(holder_mean(row_list, -3.0, weights=weight_list)) xH4A_list.append(holder_mean(x_list, 4.0, weights=weight_list)) xHn4A_list.append(holder_mean(x_list, -4.0, weights=weight_list)) matid_list.append(str(entry["material_id"])) caveats_list.append(caveats_str) if isinstance(mpr, MPRester): mpr.session.close() # Check that at least one valid material was provided num_predictions = len(matid_list) if num_predictions > 0: # Construct descriptor arrays if (len(lvpa_list) != num_predictions or len(cepa_list) != num_predictions or len(rowH1A_list) != num_predictions or len(rowHn3A_list) != num_predictions or len(xH4A_list) != num_predictions or len(xHn4A_list) != num_predictions): return (None, None, None, None) k_descriptors = np.ascontiguousarray( [lvpa_list, rowH1A_list, cepa_list, xHn4A_list], dtype=float) g_descriptors = np.ascontiguousarray( [cepa_list, lvpa_list, rowHn3A_list, xH4A_list], dtype=float) # Allocate prediction arrays k_predictions = np.empty(num_predictions) g_predictions = np.empty(num_predictions) # Make predictions k_filename = os.path.join(os.path.dirname(__file__), DATAFILE_K) g_filename = os.path.join(os.path.dirname(__file__), DATAFILE_G) gbml.core.predict(k_filename, num_predictions, k_descriptors, k_predictions) gbml.core.predict(g_filename, num_predictions, g_descriptors, g_predictions) k_list = np.power(10.0, k_predictions).tolist() g_list = np.power(10.0, g_predictions).tolist() # Append aiab problem cases for entry in aiab_problem_list: matid_list.append(entry) k_list.append(None) g_list.append(None) caveats_list.append(CAVEAT_AIAB) if len(matid_list) == 0: return (None, None, None, None) else: return (matid_list, k_list, g_list, caveats_list)