def test_from_dict(self): sym_dict = {"Fe": 6, "O": 8} self.assertEqual( Composition.from_dict(sym_dict).reduced_formula, "Fe3O4", "Creation form sym_amount dictionary failed!") comp = Composition({"Fe2+": 2, "Fe3+": 4, "O2-": 8}) comp2 = Composition.from_dict(comp.as_dict()) self.assertEqual(comp, comp2)
def test_from_dict(self): sym_dict = {"Fe": 6, "O": 8} self.assertEqual( Composition.from_dict(sym_dict).reduced_formula, "Fe3O4", "Creation form sym_amount dictionary failed!" ) comp = Composition({"Fe2+": 2, "Fe3+": 4, "O2-": 8}) comp2 = Composition.from_dict(comp.as_dict()) self.assertEqual(comp, comp2)
def comp_comp(comp1, comp2): comp_dict_1 = comp1.as_dict() if 'Li' in comp_dict_1: comp_dict_1.pop('Li') comp1_p = Composition.from_dict(comp_dict_1) comp_dict_2 = comp2.as_dict() if 'Li' in comp_dict_2: comp_dict_2.pop('Li') comp2_p = Composition.from_dict(comp_dict_2) return comp2_p.reduced_formula == comp1_p.reduced_formula
def test_Metallofullerene(self): # Test: Parse Metallofullerene formula (e.g. Y3N@C80) formula = "Y3N@C80" sym_dict = {"Y": 3, "N": 1, "C": 80} cmp = Composition(formula) cmp2 = Composition.from_dict(sym_dict) self.assertEqual(cmp, cmp2)
def _make_title(self, legend): if not legend or (not legend.get("composition", None)): return H1(self.default_title, id=self.id("title")) composition = legend["composition"] if isinstance(composition, dict): try: composition = Composition.from_dict(composition) # strip DummySpecie if present (TODO: should be method in pymatgen) composition = Composition({ el: amt for el, amt in composition.items() if not isinstance(el, DummySpecie) }) composition = composition.get_reduced_composition_and_factor( )[0] formula = composition.reduced_formula formula_parts = re.findall(r"[^\d_]+|\d+", formula) formula_components = [ html.Sub(part.strip()) if part.isnumeric() else html.Span(part.strip()) for part in formula_parts ] except: formula_components = list(map(str, composition.keys())) return H1(formula_components, id=self.id("title"), style={"display": "inline-block"})
def _make_legend(self, legend): if legend is None or (not legend.get("colors", None)): return html.Div(id=self.id("legend")) def get_font_color(hex_code): # ensures contrasting font color for background color c = tuple(int(hex_code[1:][i : i + 2], 16) for i in (0, 2, 4)) if 1 - (c[0] * 0.299 + c[1] * 0.587 + c[2] * 0.114) / 255 < 0.5: font_color = "#000000" else: font_color = "#ffffff" return font_color formula = Composition.from_dict(legend["composition"]).reduced_formula legend_colors = OrderedDict( sorted(list(legend["colors"].items()), key=lambda x: formula.find(x[1])) ) legend_elements = [ Button( html.Span( name, className="icon", style={"color": get_font_color(color)} ), kind="static", style={"background-color": color}, ) for color, name in legend_colors.items() ] return Field( [Control(el, style={"margin-right": "0.2rem"}) for el in legend_elements], id=self.id("legend"), grouped=True, )
def test_as_dict(self): c = Composition.from_dict({"Fe": 4, "O": 6}) d = c.as_dict() correct_dict = {"Fe": 4.0, "O": 6.0} self.assertEqual(d["Fe"], correct_dict["Fe"]) self.assertEqual(d["O"], correct_dict["O"]) correct_dict = {"Fe": 2.0, "O": 3.0} d = c.to_reduced_dict self.assertEqual(d["Fe"], correct_dict["Fe"]) self.assertEqual(d["O"], correct_dict["O"])
def test_as_dict(self): c = Composition.from_dict({'Fe': 4, 'O': 6}) d = c.as_dict() correct_dict = {'Fe': 4.0, 'O': 6.0} self.assertEqual(d['Fe'], correct_dict['Fe']) self.assertEqual(d['O'], correct_dict['O']) correct_dict = {'Fe': 2.0, 'O': 3.0} d = c.to_reduced_dict self.assertEqual(d['Fe'], correct_dict['Fe']) self.assertEqual(d['O'], correct_dict['O'])
def test_as_dict(self): c = Composition.from_dict({"Fe": 4, "O": 6}) d = c.as_dict() correct_dict = {"Fe": 4.0, "O": 6.0} self.assertEqual(d["Fe"], correct_dict["Fe"]) self.assertEqual(d["O"], correct_dict["O"]) correct_dict = {"Fe": 2.0, "O": 3.0} d = c.to_reduced_dict self.assertIsInstance(d, dict) self.assertEqual(d["Fe"], correct_dict["Fe"]) self.assertEqual(d["O"], correct_dict["O"])
def _make_title(self, legend): if not legend or (not legend.get("composition", None)): return H1(self.default_title, id=self.id("title")) composition = legend["composition"] if isinstance(composition, dict): composition = Composition.from_dict(composition) formula = composition.reduced_formula formula_parts = re.findall(r"[^\d_]+|\d+", formula) formula_components = [ html.Sub(part) if part.isnumeric() else html.Span(part) for part in formula_parts ] return H1(formula_components, id=self.id("title"), style={"display": "inline-block"})
def formula_query_dict(query_string): query_comp = Composition(query_string) comp_dict = query_comp.as_dict() if 'Li' in comp_dict: comp_dict.pop('Li') query_comp = Composition.from_dict(comp_dict) query_elements = [ielement.name for ielement in query_comp.elements] query_regex = [ f"(?=.*{query_elements[i]})" for i in range(len(query_elements)) ] query_regex.append('.*') query_regex = ''.join(query_regex) form_list = mongo_coll.find({ "formula_charge": { "$regex": query_regex } }).distinct("formula_charge") result_list = [ *filter(lambda x: comp_comp(query_comp, Composition(x)), form_list) ] return {"formula_charge": {"$in": result_list}}
def _make_legend(self, legend): if not legend: return html.Div(id=self.id("legend")) def get_font_color(hex_code): # ensures contrasting font color for background color c = tuple(int(hex_code[1:][i:i + 2], 16) for i in (0, 2, 4)) if 1 - (c[0] * 0.299 + c[1] * 0.587 + c[2] * 0.114) / 255 < 0.5: font_color = "#000000" else: font_color = "#ffffff" return font_color try: formula = Composition.from_dict( legend["composition"]).reduced_formula except: # TODO: fix legend for Dummy Specie compositions formula = "Unknown" legend_colors = OrderedDict( sorted(list(legend["colors"].items()), key=lambda x: formula.find(x[1]))) legend_elements = [ html.Span( html.Span(name, className="icon", style={"color": get_font_color(color)}), className="button is-static is-rounded", style={"backgroundColor": color}, ) for color, name in legend_colors.items() ] return html.Div(legend_elements, id=self.id("legend"), style={"display": "flex"}, className="buttons")
def _make_title(self, legend): if not legend or (not legend.get("composition", None)): return H1(self.default_title, id=self.id("title")) composition = legend["composition"] if isinstance(composition, dict): # TODO: make Composition handle DummySpecie for title try: composition = Composition.from_dict(composition) formula = composition.iupac_formula formula_parts = re.findall(r"[^\d_]+|\d+", formula) formula_components = [ html.Sub(part) if part.isnumeric() else html.Span(part) for part in formula_parts ] except: formula_components = list(composition.keys()) return H1( formula_components, id=self.id("title"), style={"display": "inline-block"} )
def run(mpfile, include_cifs=True, **kwargs): from pymatgen.core.composition import Composition from pymatgen.core.structure import Structure data_input = mpfile.document[mp_level01_titles[0]].pop('input') phase_names = mpfile.hdata.general['phase_names'] dir_path = os.path.dirname(os.path.realpath(__file__)) for k in data_input.keys(): data_input[k] = os.path.join(dir_path, data_input[k]) with open(data_input['formatted_entries'], "r") as fin: mp_contrib_phases = json.loads(fin.read()) with open(data_input['hull_entries'], "r") as fin: hull_states = json.loads(fin.read()) with open(data_input['mpid_existing'], 'r') as fin: mp_dup = json.loads(fin.read()) with open(data_input['mpid_new'], 'r') as fin: mp_cmp = json.loads(fin.read()) ################################################################################################################ # add unique structures first (special cases) ################################################################################################################ if include_cifs: for hstate in hull_states: if 'other' == hstate['phase']: c = Composition.from_dict(hstate['c']) s = Structure.from_dict(hstate['s']) for mpid in mpfile.ids: formula = mpfile.hdata[mpid]['data']['Formula'] if c.almost_equals(Composition(formula)): try: mpfile.add_structure(s, identifier=mpid) print formula, 'added to', mpid except Exception as ex: print 'tried to add structure to', mpid, 'but', str( ex) break # "phase": 'postspinel-NaMn2O4', "Formula": 'Na0.5MnO2', # "ΔH (eV/mol)": -1.415, "ΔHₕ (eV/mol)": '', "Ground state?": 'Y', ################################################################################################################ # Get mp-ids for all entries based on matching the VASP directory path names # Paths are different in the existing and new mp-id dictionary, so processing has to be independent ################################################################################################################ print 'get all mp-ids based on VASP directory paths ...' for framework, fdat in mp_contrib_phases.items(): for i, phase in enumerate(fdat): c = Composition(phase[0]) for hstate in hull_states: if phase_names[framework] == hstate['phase'] and \ c.almost_equals(Composition.from_dict(hstate['c'])) and \ len(mp_contrib_phases[framework][i]) < 6: mp_contrib_phases[framework][i].append(hstate['path']) mp_contrib_phases[framework][i].append(hstate['s']) for framework, fdat in mp_contrib_phases.items(): for i, phase in enumerate(fdat): match_path = phase[4].replace('all_states/', '') mp_ids = [] for path, ids in mp_dup.items(): mp_path = path.replace( '/Users/patrick/Downloads/20160710_MPContrib_MnO2_DK/', '').replace('/3.double_relax/CONTCAR', '') if match_path == mp_path: mp_ids.extend(ids) for path, id_dat in mp_cmp.items(): mp_path = path.replace( '20160725_MnO2_DK_Cifs/20160710_MPContrib_MnO2_DK-', '').replace('-3.double_relax-CONTCAR.cif', '').replace('-', '/') if match_path == mp_path: if 'mp_id' in id_dat.keys(): mp_ids.append(id_dat['mp_id']) mp_contrib_phases[framework][i].append(mp_ids) ################################################################################################################ # For structures that have mp-ids, add them to the contribution dictionary. # For those that don't, run a separate dictionary to keep track of them ################################################################################################################ print 'add structures with mp-ids to contribution ...' no_id_dict = {} for framework, fdat in mp_contrib_phases.items(): for phase in fdat: d = RecursiveDict() d["Phase"] = framework d["Formula"] = phase[0] try: float(phase[1]) d["ΔH"] = clean_value(phase[1], 'eV/mol') except: d["ΔH"] = 'N/A eV/mol' try: float(phase[3]) d["ΔHₕ"] = clean_value(phase[3], 'eV/mol') except: d["ΔHₕ"] = 'N/A eV/mol' d["GS"] = 'Yes' if phase[2] == 'Y' else 'No' if len(phase[6]) == 0: print 'no id for', d['Formula'], d['Phase'] no_id_dict[phase[4].replace('all_states/', '')] = d for mpid in phase[6]: if include_cifs: try: mpfile.add_structure(phase[5], identifier=mpid) print framework, phase[0], 'added to', mpid except ValueError as ex: print 'tried to add structure to', mpid, 'but', str(ex) mpfile.add_hierarchical_data(RecursiveDict({'data': d}), identifier=mpid) print 'added', mpid
def get_structure_type(structure, write_poscar_from_cluster=False): """ This is a topology-scaling algorithm used to describe the periodicity of bonded clusters in a bulk structure. Args: structure (structure): Pymatgen structure object to classify. write_poscar_from_cluster (bool): Set to True to write a POSCAR from the sites in the cluster. Returns: string. 'molecular' (0D), 'chain', 'layered', 'heterogeneous' (intercalated 3D), or 'conventional' (3D) """ # The conventional standard structure is much easier to work # with. structure = SpacegroupAnalyzer( structure).get_conventional_standard_structure() # Noble gases don't have well-defined bonding radii. if not len([e for e in structure.composition if e.symbol in ['He', 'Ne', 'Ar', 'Kr', 'Xe']]) == 0: type = 'noble gas' else: if len(structure.sites) < 45: structure.make_supercell(2) # Create a dict of sites as keys and lists of their # bonded neighbors as values. sites = structure.sites bonds = {} for site in sites: bonds[site] = [] for i in range(len(sites)): site_1 = sites[i] for site_2 in sites[i+1:]: if (site_1.distance(site_2) < float(Element(site_1.specie).atomic_radius + Element(site_2.specie).atomic_radius) * 1.1): bonds[site_1].append(site_2) bonds[site_2].append(site_1) # Assimilate all bonded atoms in a cluster; terminate # when it stops growing. cluster_terminated = False while not cluster_terminated: original_cluster_size = len(bonds[sites[0]]) for site in bonds[sites[0]]: bonds[sites[0]] += [ s for s in bonds[site] if s not in bonds[sites[0]]] if len(bonds[sites[0]]) == original_cluster_size: cluster_terminated = True original_cluster = bonds[sites[0]] if len(bonds[sites[0]]) == 0: # i.e. the cluster is a single atom. type = 'molecular' elif len(bonds[sites[0]]) == len(sites): # i.e. all atoms are bonded. type = 'conventional' else: # If the cluster's composition is not equal to the # structure's overall composition, it is a heterogeneous # compound. cluster_composition_dict = {} for site in bonds[sites[0]]: if Element(site.specie) in cluster_composition_dict: cluster_composition_dict[Element(site.specie)] += 1 else: cluster_composition_dict[Element(site.specie)] = 1 uniform = True if len(cluster_composition_dict): cmp = Composition.from_dict(cluster_composition_dict) if cmp.reduced_formula != structure.composition.reduced_formula: uniform = False if not uniform: type = 'heterogeneous' else: # Make a 2x2x2 supercell and recalculate the # cluster's new size. If the new cluster size is # the same as the old size, it is a non-periodic # molecule. If it is 2x as big, it's a 1D chain. # If it's 4x as big, it is a layered material. old_cluster_size = len(bonds[sites[0]]) structure.make_supercell(2) sites = structure.sites bonds = {} for site in sites: bonds[site] = [] for i in range(len(sites)): site_1 = sites[i] for site_2 in sites[i+1:]: if (site_1.distance(site_2) < float(Element(site_1.specie).atomic_radius + Element(site_2.specie).atomic_radius) * 1.1): bonds[site_1].append(site_2) bonds[site_2].append(site_1) cluster_terminated = False while not cluster_terminated: original_cluster_size = len(bonds[sites[0]]) for site in bonds[sites[0]]: bonds[sites[0]] += [ s for s in bonds[site] if s not in bonds[sites[0]]] if len(bonds[sites[0]]) == original_cluster_size: cluster_terminated = True if len(bonds[sites[0]]) != 4 * old_cluster_size: type = 'molecular' else: type = 'layered' if write_poscar_from_cluster: Structure.from_sites(original_cluster).to('POSCAR', 'POSCAR') return type
def get_structure_type(structure, tol=0.1, seed_index=0, write_poscar_from_cluster=False): """ This is a topology-scaling algorithm used to describe the periodicity of bonded clusters in a bulk structure. Args: structure (structure): Pymatgen structure object to classify. tol (float): Additional percent of atomic radii to allow for overlap, thereby defining bonds (0.1 = +10%, -0.1 = -10%) seed_index (int): Atom number to start the cluster. write_poscar_from_cluster (bool): Set to True to write a POSCAR file from the sites in the cluster. Returns: string. "molecular" (0D), "chain" (1D), "layered" (2D), or "conventional" (3D). Also includes " heterogeneous" if the cluster's composition is not equal to that of the overal structure. """ # Get conventional structure to orthogonalize the lattice as # much as possible. A tolerance of 0.1 Angst. was suggested by # pymatgen developers. s = SpacegroupAnalyzer(structure, 0.1).get_conventional_standard_structure() heterogeneous = False noble_gases = ["He", "Ne", "Ar", "Kr", "Xe", "Rn"] if len([e for e in structure.composition if e.symbol in noble_gases]) != 0: type = "noble gas" else: # make 2x2x2 supercell to ensure sufficient number of atoms # for cluster building. s.make_supercell(2) # Distance matrix (rowA, columnB) shows distance between # atoms A and B, taking PBCs into account. distance_matrix = s.distance_matrix # Fill diagonal with a large number, so the code knows that # each atom is not bonded to itself. np.fill_diagonal(distance_matrix, 100) # Rows (`radii`) and columns (`radiiT`) of radii. radii = [ELEMENT_RADII[site.species_string] for site in s.sites] radiiT = np.array(radii)[np.newaxis].T radii_matrix = radii + radiiT*(1+tol) # elements of temp that have value less than 0 are bonded. temp = distance_matrix - radii_matrix # True (1) is placed where temp < 0, and False (0) where # it is not. binary_matrix = (temp < 0).astype(int) # list of atoms bonded to the seed atom of a cluster seed = set((np.where(binary_matrix[seed_index]==1))[0]) cluster = seed NEW = seed while True: temp_set = set() for n in NEW: # temp_set will have all atoms, without duplicates, # that are connected to all atoms in NEW. temp_set.update(set(np.where(binary_matrix[n]==1)[0])) if temp_set.issubset(cluster): # if temp_set has no new atoms, the search is done. break else: NEW = temp_set - cluster # List of newly discovered atoms cluster.update(temp_set) # cluster is updated with new atoms if len(cluster) == 0: # i.e. the cluster is a single atom. cluster = [seed_index] # Make sure it's not empty to write POSCAR. type = "molecular" elif len(cluster) == len(s.sites): # i.e. all atoms are bonded. type = "conventional" else: cmp = Composition.from_dict(Counter([s[l].specie.name for l in list(cluster)])) if cmp.reduced_formula != s.composition.reduced_formula: # i.e. the cluster does not have the same composition # as the overall crystal; therefore there are other # clusters of varying composition. heterogeneous = True old_cluster_size = len(cluster) # Increase structure to determine whether it is # layered or molecular, then perform the same kind # of cluster search as before. s.make_supercell(2) distance_matrix = s.distance_matrix np.fill_diagonal(distance_matrix,100) radii = [ELEMENT_RADII[site.species_string] for site in s.sites] radiiT = np.array(radii)[np.newaxis].T radii_matrix = radii + radiiT*(1+tol) temp = distance_matrix-radii_matrix binary_matrix = (temp < 0).astype(int) seed = set((np.where(binary_matrix[seed_index]==1))[0]) cluster = seed NEW = seed check = True while check: temp_set = set() for n in NEW: temp_set.update(set(np.where(binary_matrix[n]==1)[0])) if temp_set.issubset(cluster): check = False else: NEW = temp_set - cluster cluster.update(temp_set) if len(cluster) != 4 * old_cluster_size: type = "molecular" else: type = "layered" if heterogeneous: type += " heterogeneous" cluster_sites = [s.sites[n] for n in cluster] if write_poscar_from_cluster: s.from_sites(cluster_sites).get_primitive_structure().to("POSCAR", "POSCAR") return type
def get_structure_type(structure, write_poscar_from_cluster=False): """ This is a topology-scaling algorithm used to describe the periodicity of bonded clusters in a bulk structure. Args: structure (structure): Pymatgen structure object to classify. write_poscar_from_cluster (bool): Set to True to write a POSCAR from the sites in the cluster. Returns: string. 'molecular' (0D), 'chain', 'layered', 'heterogeneous' (intercalated 3D), or 'conventional' (3D) """ # The conventional standard structure is much easier to work # with. structure = SpacegroupAnalyzer( structure).get_conventional_standard_structure() # Noble gases don't have well-defined bonding radii. if len([ e for e in structure.composition if e.symbol in ['He', 'Ne', 'Ar', 'Kr', 'Xe'] ]) != 0: type = 'noble gas' else: if len(structure.sites) < 45: structure.make_supercell(2) # Create a dict of sites as keys and lists of their # bonded neighbors as values. sites = structure.sites bonds = {} for site in sites: bonds[site] = [] for i in range(len(sites)): site_1 = sites[i] for site_2 in sites[i + 1:]: if (site_1.distance(site_2) < float( Element(site_1.specie).atomic_radius + Element(site_2.specie).atomic_radius) * 1.1): bonds[site_1].append(site_2) bonds[site_2].append(site_1) # Assimilate all bonded atoms in a cluster; terminate # when it stops growing. cluster_terminated = False while not cluster_terminated: original_cluster_size = len(bonds[sites[0]]) for site in bonds[sites[0]]: bonds[sites[0]] += [ s for s in bonds[site] if s not in bonds[sites[0]] ] if len(bonds[sites[0]]) == original_cluster_size: cluster_terminated = True original_cluster = bonds[sites[0]] if len(bonds[sites[0]]) == 0: # i.e. the cluster is a single atom. type = 'molecular' elif len(bonds[sites[0]]) == len(sites): # i.e. all atoms are bonded. type = 'conventional' else: # If the cluster's composition is not equal to the # structure's overall composition, it is a heterogeneous # compound. cluster_composition_dict = {} for site in bonds[sites[0]]: if Element(site.specie) in cluster_composition_dict: cluster_composition_dict[Element(site.specie)] += 1 else: cluster_composition_dict[Element(site.specie)] = 1 uniform = True if len(cluster_composition_dict): cmp = Composition.from_dict(cluster_composition_dict) if cmp.reduced_formula != structure.composition.reduced_formula: uniform = False if not uniform: type = 'heterogeneous' else: # Make a 2x2x2 supercell and recalculate the # cluster's new size. If the new cluster size is # the same as the old size, it is a non-periodic # molecule. If it is 2x as big, it's a 1D chain. # If it's 4x as big, it is a layered material. old_cluster_size = len(bonds[sites[0]]) structure.make_supercell(2) sites = structure.sites bonds = {} for site in sites: bonds[site] = [] for i in range(len(sites)): site_1 = sites[i] for site_2 in sites[i + 1:]: if (site_1.distance(site_2) < float( Element(site_1.specie).atomic_radius + Element(site_2.specie).atomic_radius) * 1.1): bonds[site_1].append(site_2) bonds[site_2].append(site_1) cluster_terminated = False while not cluster_terminated: original_cluster_size = len(bonds[sites[0]]) for site in bonds[sites[0]]: bonds[sites[0]] += [ s for s in bonds[site] if s not in bonds[sites[0]] ] if len(bonds[sites[0]]) == original_cluster_size: cluster_terminated = True if len(bonds[sites[0]]) != 4 * old_cluster_size: type = 'molecular' else: type = 'layered' if write_poscar_from_cluster: Structure.from_sites(original_cluster).to('POSCAR', 'POSCAR') return type
def run(mpfile, include_cifs=True, nmax=None, dup_check_test_site=True): from pymatgen.core.composition import Composition from pymatgen.core.structure import Structure data_input = mpfile.document[mp_level01_titles[0]].pop('input') phase_names = mpfile.hdata.general['info']['phase_names'] dir_path = os.path.dirname(os.path.realpath(__file__)) for k in data_input.keys(): data_input[k] = os.path.join(dir_path, data_input[k]) doi = mpfile.hdata.general['doi'] existing_mpids = {} for b in [False, True]: with Mno2PhaseSelectionRester(test_site=b) as mpr: for doc in mpr.query_contributions(criteria={'content.doi': doi}): existing_mpids[doc['mp_cat_id']] = doc['_id'] if not dup_check_test_site: break with open(data_input['formatted_entries'], "r") as fin: mp_contrib_phases = json.loads(fin.read()) with open(data_input['hull_entries'], "r") as fin: hull_states = json.loads(fin.read()) with open(data_input['mpid_existing'], 'r') as fin: mp_dup = json.loads(fin.read()) with open(data_input['mpid_new'], 'r') as fin: mp_cmp = json.loads(fin.read()) ################################################################################################################ # add unique structures first (special cases) ################################################################################################################ if include_cifs: for hstate in hull_states: if 'other' == hstate['phase']: c = Composition.from_dict(hstate['c']) s = Structure.from_dict(hstate['s']) for mpid in mpfile.ids: formula = mpfile.hdata[mpid]['data']['Formula'] if c.almost_equals(Composition(formula)): if nmax is not None and mpid in existing_mpids: mpfile.document.pop(mpid) # skip duplicates break try: mpfile.add_structure(s, identifier=mpid) print formula, 'added to', mpid except Exception as ex: print 'tried to add structure to', mpid, 'but', str(ex) if mpid in existing_mpids: cid = existing_mpids[mpid] mpfile.insert_id(mpid, cid) print cid, 'inserted to update', mpid break # "phase": 'postspinel-NaMn2O4', "Formula": 'Na0.5MnO2', # "dHf (eV/mol)": -1.415, "dHh (eV/mol)": '--', "Ground state?": 'Y', ################################################################################################################ # Get mp-ids for all entries based on matching the VASP directory path names # Paths are different in the existing and new mp-id dictionary, so processing has to be independent ################################################################################################################ print 'get all mp-ids based on VASP directory paths ...' for framework, fdat in mp_contrib_phases.items(): for i, phase in enumerate(fdat): c = Composition(phase[0]) for hstate in hull_states: if phase_names[framework] == hstate['phase'] and \ c.almost_equals(Composition.from_dict(hstate['c'])) and \ len(mp_contrib_phases[framework][i]) < 6: mp_contrib_phases[framework][i].append(hstate['path']) mp_contrib_phases[framework][i].append(hstate['s']) for framework, fdat in mp_contrib_phases.items(): for i, phase in enumerate(fdat): match_path = phase[4].replace('all_states/', '') mp_ids = [] for path, ids in mp_dup.items(): mp_path = path.replace('/Users/patrick/Downloads/20160710_MPContrib_MnO2_DK/', '').replace( '/3.double_relax/CONTCAR', '') if match_path == mp_path: mp_ids.extend(ids) for path, id_dat in mp_cmp.items(): mp_path = path.replace('20160725_MnO2_DK_Cifs/20160710_MPContrib_MnO2_DK-', '').replace( '-3.double_relax-CONTCAR.cif', '').replace('-', '/') if match_path == mp_path: if 'mp_id' in id_dat.keys(): mp_ids.append(id_dat['mp_id']) mp_contrib_phases[framework][i].append(mp_ids) ################################################################################################################ # For structures that have mp-ids, add them to the contribution dictionary. # For those that don't, run a separate dictionary to keep track of them ################################################################################################################ print 'add structures with mp-ids to contribution ...' no_id_dict = {} errors_file = os.path.join(os.path.dirname(__file__), 'errors.json') with open(errors_file, 'r') as f: errors = json.load(f) for framework, fdat in mp_contrib_phases.items(): for phase in fdat: d = RecursiveDict() d["Phase"] = framework d["Formula"] = phase[0] try: float(phase[1]) d["dHf"] = '{} eV/mol'.format(phase[1]) except: d["dHf"] = '--' try: float(phase[3]) d["dHh"] = '{} eV/mol'.format(phase[3]) except: d["dHh"] = '--' d["GS"] = phase[2] if len(phase[6]) == 0: no_id_dict[phase[4].replace('all_states/', '')] = d for mpid in phase[6]: if nmax is not None: if len(mpfile.ids) >= nmax-1: break elif mpid in existing_mpids: continue # skip duplicates mpfile.add_hierarchical_data(RecursiveDict({'data': d}), identifier=mpid) print 'added', mpid if mpid in existing_mpids: cid = existing_mpids[mpid] mpfile.insert_id(mpid, cid) print cid, 'inserted to update', mpid if include_cifs: try: mpfile.add_structure(phase[5], identifier=mpid) print framework, phase[0], 'added to', mpid except ValueError as ex: print 'tried to add structure to', mpid, 'but', str(ex) errors[mpid] = str(ex) with open(errors_file, 'w') as f: json.dump(errors, f) print """ DONE. {} structures to submit. {} structures do not have mp-ids. {} structures with mp-ids have errors. """.format(len(mpfile.ids), len(no_id_dict), len(errors))
def plot_ion_hull_and_voltages(ion, fmt='pdf'): """ Plots the phase diagram between the pure material and pure ion, Connecting the points on the convex hull of the phase diagram. Args: ion (str): name of atom that was intercalated, e.g. 'Li'. fmt (str): matplotlib format style. Check the matplotlib docs for options. """ # Calculated with the relax() function in # mat2d.stability.startup. If you are using other input # parameters, you need to recalculate these values! ion_ev_fu = {'Li': -1.7540797, 'Mg': -1.31976062, 'Al': -3.19134607} energy = Vasprun('vasprun.xml').final_energy composition = Structure.from_file('POSCAR').composition # Get the formula (with single-digit integers preceded by a '_'). twod_material = list(composition.reduced_formula) twod_formula = str() for i in range(len(twod_material)): try: int(twod_material[i]) twod_formula += '_{}'.format(twod_material[i]) except: twod_formula += twod_material[i] twod_ev_fu = energy / composition.get_reduced_composition_and_factor()[1] data = [(0, 0, 0, twod_ev_fu)] # (at% ion, n_ions, E_F, abs_energy) dirs = [dir for dir in os.listdir(os.getcwd()) if os.path.isdir(dir)] for directory in dirs: if is_converged(directory): os.chdir(directory) energy = Vasprun('vasprun.xml').final_energy composition = Structure.from_file('POSCAR').composition ion_fraction = composition.get_atomic_fraction(ion) no_ion_comp_dict = composition.as_dict() no_ion_comp_dict.update({ion: 0}) no_ion_comp = Composition.from_dict(no_ion_comp_dict) n_twod_fu = no_ion_comp.get_reduced_composition_and_factor()[1] n_ions = composition[ion] / n_twod_fu E_F = ((energy - composition[ion] * ion_ev_fu[ion] - twod_ev_fu * n_twod_fu)/ composition.num_atoms) data.append((ion_fraction, n_ions, E_F, energy / n_twod_fu)) os.chdir('../') data.append((1, 1, 0, ion_ev_fu[ion])) # Pure ion sorted_data = sorted(data, key=operator.itemgetter(0)) # Determine which compositions are on the convex hull. energy_profile = np.array([[item[0], item[2]] for item in sorted_data if item[2] <= 0]) hull = ConvexHull(energy_profile) convex_ion_fractions = [energy_profile[vertex, 0] for vertex in hull.vertices] convex_formation_energies = [energy_profile[vertex, 1] for vertex in hull.vertices] convex_ion_fractions.append(convex_ion_fractions.pop(0)) convex_formation_energies.append(convex_formation_energies.pop(0)) concave_ion_fractions = [pt[0] for pt in sorted_data if pt[0] not in convex_ion_fractions] concave_formation_energies = [pt[2] for pt in sorted_data if pt[0] not in convex_ion_fractions] voltage_profile = [] j = 0 k = 0 for i in range(1, len(sorted_data) - 1): if sorted_data[i][0] in convex_ion_fractions: voltage = -(((sorted_data[i][3] - sorted_data[k][3])- (sorted_data[i][1] - sorted_data[k][1]) * ion_ev_fu[ion]) / (sorted_data[i][1] - sorted_data[k][1])) voltage_profile.append((sorted_data[k][0], voltage)) voltage_profile.append((sorted_data[i][0], voltage)) j += 1 k = i voltage_profile.append((voltage_profile[-1][0], 0)) voltage_profile.append((1, 0)) voltage_profile_x = [tup[0] for tup in voltage_profile] voltage_profile_y = [tup[1] for tup in voltage_profile] ax = plt.figure(figsize=(14, 10)).gca() ax.plot([0, 1], [0, 0], 'k--') ax.plot(convex_ion_fractions, convex_formation_energies, 'b-', marker='o', markersize=12, markeredgecolor='none') ax.plot(concave_ion_fractions, concave_formation_energies, 'r', marker='o', linewidth=0, markersize=12, markeredgecolor='none') ax2 = ax.twinx() ax2.plot(voltage_profile_x, voltage_profile_y, 'k-', marker='o') ax.text(0, 0.002, r'$\mathrm{%s}$' % twod_formula, family='serif', size=24) ax.text(0.99, 0.002, r'$\mathrm{%s}$' % ion, family='serif', size=24, horizontalalignment='right') ax.set_xticklabels(ax.get_xticks(), family='serif', size=20) ax.set_yticklabels(ax.get_yticks(), family='serif', size=20) ax2.set_yticklabels(ax2.get_yticks(), family='serif', size=20) ax.set_xlabel('at% {}'.format(ion), family='serif', size=28) ax.set_ylabel(r'$\mathrm{E_F\/(eV/atom)}$', size=28) ax2.yaxis.set_label_position('right') if ion == 'Li': ax2.set_ylabel(r'$\mathrm{Potential\/vs.\/Li/Li^+\/(V)}$', size=28) elif ion == 'Mg': ax2.set_ylabel(r'$\mathrm{Potential\/vs.\/Mg/Mg^{2+}\/(V)}$', size=28) elif ion == 'Al': ax2.set_ylabel(r'$\mathrm{Potential\/vs.\/Al/Al^{3+}\/(V)}$', size=28) plt.savefig('{}_hull.{}'.format(ion, fmt), transparent=True)
def get_structure_type(structure, tol=0.1, seed_index=0, write_poscar_from_cluster=False): """ This is a topology-scaling algorithm used to describe the periodicity of bonded clusters in a bulk structure. Args: structure (structure): Pymatgen structure object to classify. tol (float): Additional percent of atomic radii to allow for overlap, thereby defining bonds (0.1 = +10%, -0.1 = -10%) seed_index (int): Atom number to start the cluster. write_poscar_from_cluster (bool): Set to True to write a POSCAR file from the sites in the cluster. Returns: string. "molecular" (0D), "chain" (1D), "layered" (2D), or "conventional" (3D). Also includes " heterogeneous" if the cluster's composition is not equal to that of the overal structure. """ # Get conventional structure to orthogonalize the lattice as # much as possible. A tolerance of 0.1 Angst. was suggested by # pymatgen developers. s = SpacegroupAnalyzer(structure, 0.1).get_conventional_standard_structure() heterogeneous = False noble_gases = ["He", "Ne", "Ar", "Kr", "Xe", "Rn"] if len([e for e in structure.composition if e.symbol in noble_gases]) != 0: type = "noble gas" else: # make 2x2x2 supercell to ensure sufficient number of atoms # for cluster building. s.make_supercell(2) # Distance matrix (rowA, columnB) shows distance between # atoms A and B, taking PBCs into account. distance_matrix = s.distance_matrix # Fill diagonal with a large number, so the code knows that # each atom is not bonded to itself. np.fill_diagonal(distance_matrix, 100) # Rows (`radii`) and columns (`radiiT`) of radii. radii = [ELEMENT_RADII[site.species_string] for site in s.sites] radiiT = np.array(radii)[np.newaxis].T radii_matrix = radii + radiiT * (1 + tol) # elements of temp that have value less than 0 are bonded. temp = distance_matrix - radii_matrix # True (1) is placed where temp < 0, and False (0) where # it is not. binary_matrix = (temp < 0).astype(int) # list of atoms bonded to the seed atom of a cluster seed = set((np.where(binary_matrix[seed_index] == 1))[0]) cluster = seed NEW = seed while True: temp_set = set() for n in NEW: # temp_set will have all atoms, without duplicates, # that are connected to all atoms in NEW. temp_set.update(set(np.where(binary_matrix[n] == 1)[0])) if temp_set.issubset(cluster): # if temp_set has no new atoms, the search is done. break else: NEW = temp_set - cluster # List of newly discovered atoms cluster.update(temp_set) # cluster is updated with new atoms if len(cluster) == 0: # i.e. the cluster is a single atom. cluster = [seed_index] # Make sure it's not empty to write POSCAR. type = "molecular" elif len(cluster) == len(s.sites): # i.e. all atoms are bonded. type = "conventional" else: cmp = Composition.from_dict( Counter([s[l].specie.name for l in list(cluster)])) if cmp.reduced_formula != s.composition.reduced_formula: # i.e. the cluster does not have the same composition # as the overall crystal; therefore there are other # clusters of varying composition. heterogeneous = True old_cluster_size = len(cluster) # Increase structure to determine whether it is # layered or molecular, then perform the same kind # of cluster search as before. s.make_supercell(2) distance_matrix = s.distance_matrix np.fill_diagonal(distance_matrix, 100) radii = [ELEMENT_RADII[site.species_string] for site in s.sites] radiiT = np.array(radii)[np.newaxis].T radii_matrix = radii + radiiT * (1 + tol) temp = distance_matrix - radii_matrix binary_matrix = (temp < 0).astype(int) seed = set((np.where(binary_matrix[seed_index] == 1))[0]) cluster = seed NEW = seed check = True while check: temp_set = set() for n in NEW: temp_set.update(set(np.where(binary_matrix[n] == 1)[0])) if temp_set.issubset(cluster): check = False else: NEW = temp_set - cluster cluster.update(temp_set) if len(cluster) != 4 * old_cluster_size: type = "molecular" else: type = "layered" if heterogeneous: type += " heterogeneous" cluster_sites = [s.sites[n] for n in cluster] if write_poscar_from_cluster: s.from_sites(cluster_sites).get_primitive_structure().to( "POSCAR", "POSCAR") return type
def material_load_binary(d, sep='-', p=prop): return_data = [] d = d.split(sep) # Create a phase diagram object for the following system: entry = mp.get_entries_in_chemsys( [d[0], d[1]]) # gets the entries of the chemical system pd = PhaseDiagram(entry) # creates a phasediagram object pd_analyse = PDAnalyzer(pd) # creates a phase Diagram analysis object # Get the features for various proportions Using the get_hull_energy method; # (Need to add documentation) for i in range(0, len(p)): temp_data = {} prop_a = p[i] prop_b = p[-(i + 1)] try: temp_data['system'] = d[0] + '-' + d[1] temp_data['A'] = d[0] temp_data['B'] = d[1] temp_data[d[0] + '_prop'] = prop_a temp_data[d[1] + '_prop'] = prop_b temp_data['formation_energy'] = pd_analyse.get_hull_energy( Composition.from_dict({ d[0]: prop_a, d[1]: prop_b })) # Element Property extraction temp_data['avg_atomic_mass'] = prop_a * elements.loc[ d[0]].mass + prop_b * elements.loc[d[1]].mass temp_data['avg_row'] = prop_a * elements.loc[ d[0]].period + prop_b * elements.loc[d[1]].period temp_data['avg_col'] = prop_a * elements.loc[ d[0]].group + prop_b * elements.loc[d[1]].group temp_data['max_z_diff'] = abs( elements.loc[d[0]].z - elements.loc[d[1]].z) # Max Difference in atomic number temp_data['avg_z'] = prop_a * elements.loc[ d[0]].z + prop_b * elements.loc[d[1]].z temp_data['max_radius_diff'] = abs( elements.loc[d[0]].atomic_radii - elements.loc[d[1]].atomic_radii ) # Max Difference in atomic radius temp_data['avg_radius'] = prop_a * elements.loc[ d[0]].atomic_radii + prop_b * elements.loc[d[1]].atomic_radii temp_data['max_en_diff'] = abs( elements.loc[d[0]].electronegativity - elements.loc[d[1]].electronegativity ) # Max Difference in electronegativity temp_data['avg_en'] = prop_a * elements.loc[ d[0]].electronegativity + prop_b * elements.loc[d[ 1]].electronegativity # Avg Difference in electronegativity temp_data['avg_s_elec'] = prop_a * elements.loc[ d[0]].s_elec + prop_b * elements.loc[d[1]].s_elec temp_data['avg_p_elec'] = prop_a * elements.loc[ d[0]].p_elec + prop_b * elements.loc[d[1]].p_elec temp_data['avg_d_elec'] = prop_a * elements.loc[ d[0]].d_elec + prop_b * elements.loc[d[1]].d_elec temp_data['avg_f_elec'] = prop_a * elements.loc[ d[0]].f_elec + prop_b * elements.loc[d[1]].f_elec temp_sum = temp_data['avg_s_elec'] + temp_data[ 'avg_p_elec'] + temp_data['avg_d_elec'] + temp_data[ 'avg_f_elec'] temp_data['prop_s_elec'] = temp_data['avg_s_elec'] / temp_sum temp_data['prop_p_elec'] = temp_data['avg_p_elec'] / temp_sum temp_data['prop_d_elec'] = temp_data['avg_d_elec'] / temp_sum temp_data['prop_f_elec'] = temp_data['avg_f_elec'] / temp_sum return_data.append(temp_data) except: pass return return_data, temp_data['system']
def disordered_formula(disordered_struct, symbols=('x', 'y', 'z'), fmt='plain'): """ Returns a formula of a form like AxB1-x (x=0.5) for disordered structures. Will only return a formula for disordered structures with one kind of disordered site at present. Args: disordered_struct: a disordered structure symbols: a tuple of characters to use for subscripts, by default this is ('x', 'y', 'z') but if you have more than three disordered species more symbols will need to be added fmt (str): 'plain', 'HTML' or 'LaTeX' Returns (str): a disordered formula string """ # this is in string utils and not in # Composition because we need to have access # to site occupancies to calculate this, so # have to pass the full structure as an argument # (alternatively this could be made a method on # Structure) from pymatgen.core.composition import Composition from pymatgen.core.periodic_table import get_el_sp if disordered_struct.is_ordered: raise ValueError("Structure is not disordered, " "so disordered formula not defined.") disordered_site_compositions = {site.species_and_occu for site in disordered_struct if not site.is_ordered} if len(disordered_site_compositions) > 1: # this probably won't happen too often raise ValueError("Ambiguous how to define disordered " "formula when more than one type of disordered " "site is present.") disordered_site_composition = disordered_site_compositions.pop() disordered_species = {str(sp) for sp, occu in disordered_site_composition.items()} if len(disordered_species) > len(symbols): # this probably won't happen too often either raise ValueError("Not enough symbols to describe disordered composition: " "{}".format(symbols)) symbols = list(symbols)[0:len(disordered_species) - 1] comp = disordered_struct.composition.get_el_amt_dict().items() # sort by electronegativity, as per composition comp = sorted(comp, key=lambda x: get_el_sp(x[0]).X) disordered_comp = [] variable_map = {} total_disordered_occu = sum([occu for sp, occu in comp if str(sp) in disordered_species]) # composition to get common factor factor_comp = disordered_struct.composition.as_dict() factor_comp['X'] = total_disordered_occu for sp in disordered_species: del factor_comp[str(sp)] factor_comp = Composition.from_dict(factor_comp) factor = factor_comp.get_reduced_formula_and_factor()[1] total_disordered_occu /= factor remainder = "{}-{}".format(formula_double_format(total_disordered_occu, ignore_ones=False), '-'.join(symbols)) for sp, occu in comp: sp = str(sp) if sp not in disordered_species: disordered_comp.append((sp, formula_double_format(occu/factor))) else: if len(symbols) > 0: symbol = symbols.pop(0) disordered_comp.append((sp, symbol)) variable_map[symbol] = occu / total_disordered_occu / factor else: disordered_comp.append((sp, remainder)) if fmt == 'LaTeX': sub_start = "_{" sub_end = "}" elif fmt == 'HTML': sub_start = "<sub>" sub_end = "</sub>" elif fmt != 'plain': raise ValueError("Unsupported output format, " "choose from: LaTeX, HTML, plain") disordered_formula = [] for sp, occu in disordered_comp: disordered_formula.append(sp) if occu: # can be empty string if 1 if fmt != 'plain': disordered_formula.append(sub_start) disordered_formula.append(occu) if fmt != 'plain': disordered_formula.append(sub_end) disordered_formula.append(" ") disordered_formula += ["{}={} ".format(k, formula_double_format(v)) for k, v in variable_map.items()] comp = disordered_struct.composition return "".join(map(str, disordered_formula))[0:-1]
def plot_ion_hull_and_voltages(ion, fmt='pdf'): """ Plots the phase diagram between the pure material and pure ion, Connecting the points on the convex hull of the phase diagram. Args: ion (str): name of atom that was intercalated, e.g. 'Li'. fmt (str): matplotlib format style. Check the matplotlib docs for options. """ # Calculated with the relax() function in # twod_materials.stability.startup. If you are using other input # parameters, you need to recalculate these values! ion_ev_fu = {'Li': -1.7540797, 'Mg': -1.31976062, 'Al': -3.19134607} energy = Vasprun('vasprun.xml').final_energy composition = Structure.from_file('POSCAR').composition # Get the formula (with single-digit integers preceded by a '_'). twod_material = list(composition.reduced_formula) twod_formula = str() for i in range(len(twod_material)): try: int(twod_material[i]) twod_formula += '_{}'.format(twod_material[i]) except: twod_formula += twod_material[i] twod_ev_fu = energy / composition.get_reduced_composition_and_factor()[1] data = [(0, 0, 0, twod_ev_fu)] # (at% ion, n_ions, E_F, abs_energy) for directory in [ dir for dir in os.listdir(os.getcwd()) if os.path.isdir(dir)]: if is_converged(directory): os.chdir(directory) energy = Vasprun('vasprun.xml').final_energy composition = Structure.from_file('POSCAR').composition ion_fraction = composition.get_atomic_fraction(ion) no_ion_comp_dict = composition.as_dict() no_ion_comp_dict.update({ion: 0}) no_ion_comp = Composition.from_dict(no_ion_comp_dict) n_twod_fu = no_ion_comp.get_reduced_composition_and_factor()[1] n_ions = composition[ion] / n_twod_fu E_F = ( (energy - composition[ion] * ion_ev_fu[ion] - twod_ev_fu * n_twod_fu) / composition.num_atoms ) data.append((ion_fraction, n_ions, E_F, energy / n_twod_fu)) os.chdir('../') data.append((1, 1, 0, ion_ev_fu[ion])) # Pure ion sorted_data = sorted(data, key=operator.itemgetter(0)) # Determine which compositions are on the convex hull. energy_profile = np.array([[item[0], item[2]] for item in sorted_data if item[2] <= 0]) hull = ConvexHull(energy_profile) convex_ion_fractions = [ energy_profile[vertex, 0] for vertex in hull.vertices] convex_formation_energies = [ energy_profile[vertex, 1] for vertex in hull.vertices] convex_ion_fractions.append(convex_ion_fractions.pop(0)) convex_formation_energies.append(convex_formation_energies.pop(0)) concave_ion_fractions = [ pt[0] for pt in sorted_data if pt[0] not in convex_ion_fractions] concave_formation_energies = [ pt[2] for pt in sorted_data if pt[0] not in convex_ion_fractions] voltage_profile = [] j = 0 k = 0 for i in range(1, len(sorted_data) - 1): if sorted_data[i][0] in convex_ion_fractions: voltage = -( ((sorted_data[i][3] - sorted_data[k][3]) - (sorted_data[i][1] - sorted_data[k][1]) * ion_ev_fu[ion]) / (sorted_data[i][1] - sorted_data[k][1]) ) voltage_profile.append((sorted_data[k][0], voltage)) voltage_profile.append((sorted_data[i][0], voltage)) j += 1 k = i voltage_profile.append((voltage_profile[-1][0], 0)) voltage_profile.append((1, 0)) voltage_profile_x = [tup[0] for tup in voltage_profile] voltage_profile_y = [tup[1] for tup in voltage_profile] ax = plt.figure(figsize=(14, 10)).gca() ax.plot([0, 1], [0, 0], 'k--') ax.plot(convex_ion_fractions, convex_formation_energies, 'b-', marker='o', markersize=12, markeredgecolor='none') ax.plot(concave_ion_fractions, concave_formation_energies, 'r', marker='o', linewidth=0, markersize=12, markeredgecolor='none') ax2 = ax.twinx() ax2.plot(voltage_profile_x, voltage_profile_y, 'k-', marker='o') ax.text(0, 0.002, r'$\mathrm{%s}$' % twod_formula, family='serif', size=24) ax.text(0.99, 0.002, r'$\mathrm{%s}$' % ion, family='serif', size=24, horizontalalignment='right') ax.set_xticklabels(ax.get_xticks(), family='serif', size=20) ax.set_yticklabels(ax.get_yticks(), family='serif', size=20) ax2.set_yticklabels(ax2.get_yticks(), family='serif', size=20) ax.set_xlabel('at% {}'.format(ion), family='serif', size=28) ax.set_ylabel(r'$\mathrm{E_F\/(eV/atom)}$', size=28) ax2.yaxis.set_label_position('right') if ion == 'Li': ax2.set_ylabel(r'$\mathrm{Potential\/vs.\/Li/Li^+\/(V)}$', size=28) elif ion == 'Mg': ax2.set_ylabel(r'$\mathrm{Potential\/vs.\/Mg/Mg^{2+}\/(V)}$', size=28) elif ion == 'Al': ax2.set_ylabel(r'$\mathrm{Potential\/vs.\/Al/Al^{3+}\/(V)}$', size=28) plt.savefig('{}_hull.{}'.format(ion, fmt), transparent=True)
def run(mpfile, include_cifs=True): data = mpfile.hdata.general['data'] phase_names = data['phase_names'] data_input = data.pop('input') dir_path = os.path.dirname(os.path.realpath(__file__)) for k in data_input.keys(): data_input[k] = os.path.join(dir_path, data_input[k]) with open(data_input['formatted_entries'], "r") as fin: mp_contrib_phases = json.loads(fin.read()) with open(data_input['hull_entries'], "r") as fin: hull_states = json.loads(fin.read()) with open(data_input['mpid_existing'], 'r') as fin: mp_dup = json.loads(fin.read()) with open(data_input['mpid_new'], 'r') as fin: mp_cmp = json.loads(fin.read()) ################################################################################################################ # add unique structures first (special cases) ################################################################################################################ if include_cifs: for hstate in hull_states: if 'other' == hstate['phase']: c = Composition.from_dict(hstate['c']) s = Structure.from_dict(hstate['s']) for mpid in mpfile.ids: formula = mpfile.hdata[mpid]['Formula'] if c.almost_equals(Composition(formula)): try: mpfile.add_structure(s, identifier=mpid) print formula, 'added to', mpid except Exception as ex: print 'tried to add structure to', mpid, 'but', str( ex) break # "phase": 'postspinel-NaMn2O4', "Formula": 'Na0.5MnO2', # "dHf (eV/mol)": -1.415, "dHh (eV/mol)": '--', "Ground state?": 'Y', ################################################################################################################ # Get mp-ids for all entries based on matching the VASP directory path names # Paths are different in the existing and new mp-id dictionary, so processing has to be independent ################################################################################################################ for framework, fdat in mp_contrib_phases.items(): for i, phase in enumerate(fdat): c = Composition(phase[0]) for hstate in hull_states: if phase_names[framework] == hstate['phase'] and \ c.almost_equals(Composition.from_dict(hstate['c'])) and \ len(mp_contrib_phases[framework][i]) < 6: mp_contrib_phases[framework][i].append(hstate['path']) mp_contrib_phases[framework][i].append(hstate['s']) for framework, fdat in mp_contrib_phases.items(): for i, phase in enumerate(fdat): match_path = phase[4].replace('all_states/', '') mp_ids = [] for path, ids in mp_dup.items(): mp_path = path.replace( '/Users/patrick/Downloads/20160710_MPContrib_MnO2_DK/', '').replace('/3.double_relax/CONTCAR', '') if match_path == mp_path: mp_ids.extend(ids) for path, id_dat in mp_cmp.items(): mp_path = path.replace( '20160725_MnO2_DK_Cifs/20160710_MPContrib_MnO2_DK-', '').replace('-3.double_relax-CONTCAR.cif', '').replace('-', '/') if match_path == mp_path: if 'mp_id' in id_dat.keys(): mp_ids.append(id_dat['mp_id']) mp_contrib_phases[framework][i].append(mp_ids) ################################################################################################################ # For structures that have mp-ids, add them to the contribution dictionary. # For those that don't, run a separate dictionary to keep track of them ################################################################################################################ no_id_dict = {} for framework, fdat in mp_contrib_phases.items(): for phase in fdat: d = { "Phase": framework, "Formula": phase[0], "dHf": '{} eV/mol'.format(phase[1]), "dHh": '{} eV/mol'.format(phase[3]), "GS": phase[2] } if len(phase[6]) == 0: no_id_dict[phase[4].replace('all_states/', '')] = d for mpid in phase[6]: mpfile.add_hierarchical_data(mpid, d) if include_cifs: try: mpfile.add_structure(phase[5], identifier=mpid) print framework, phase[0], 'added to', mpid except ValueError as ex: print 'tried to add structure to', mpid, 'but', str(ex) return 'DONE. {} do not have mp-ids!'.format(len(no_id_dict))