Exemplo n.º 1
0
 def test_stitch_xafs(self):
     self.assertRaises(ValueError, XAS.stitch, self.k_xanes, self.k_exafs,
                       mode="invalid")
     xafs = XAS.stitch(self.k_xanes, self.k_exafs, mode="XAFS")
     self.assertIsInstance(xafs, XAS)
     self.assertEqual("XAFS", xafs.spectrum_type)
     self.assertEqual(len(xafs.x), 500)
     self.assertAlmostEqual(min(xafs.x), min(self.k_xanes.x), 2)
     self.assertAlmostEqual(max(xafs.y), max(self.k_xanes.y), 2)
     self.assertAlmostEqual(xafs.x[np.argmax(np.gradient(xafs.y) /
                                             np.gradient(xafs.x))],
                            self.k_xanes.e0, 2)
     self.assertRaises(ValueError, XAS.stitch,
                       self.k_xanes, self.l2_xanes, mode="XAFS")
     self.k_xanes.x = np.zeros(100)
     self.assertRaises(ValueError, XAS.stitch,
                       self.k_xanes, self.k_exafs)
     self.k_xanes.absorbing_element = Element("Pt")
     self.assertRaises(ValueError, XAS.stitch,
                       self.k_xanes, self.k_exafs, mode="XAFS")
Exemplo n.º 2
0
    def test(self):
        surfs = glob.glob("POSCAR.01x01x01/01.scale_pert/surf*")
        surfs = [ii.split('/')[-1] for ii in surfs]
        surfs.sort()
        self.assertEqual(surfs, self.surfs)
        poscars = glob.glob("POSCAR.01x01x01/00.place_ele/surf*/sys*/POSCAR")
        for poscar in poscars:
            surf = poscar.split('/')[-3]
            st1 = Structure.from_file(surf + '.POSCAR')
            st2 = Structure.from_file(poscar)
            vacuum_size = float(
                Element(self.jdata['elements'][0]).atomic_radius * 2)
            self.assertTrue(st1.lattice.c + vacuum_size - st2.lattice.c < 0.01)

        for surf in self.surfs:
            elongs = glob.glob("POSCAR.01x01x01/01.scale_pert/" + surf +
                               "/sys-*/scale-1.000/el*")
            elongs = [ii.split('/')[-1] for ii in elongs]
            elongs.sort()
            self.assertEqual(elongs, self.elongs)
Exemplo n.º 3
0
    def test_structure_to_oxidstructure(self):
        cscl = Structure(Lattice([[4.209, 0, 0], [0, 4.209, 0], [0, 0, 4.209]]),
                         ["Cl", "Cs"], [[0.45, 0.5, 0.5], [0, 0, 0]])
        d = {'structure': [cscl]}
        df = DataFrame(data=d)

        df["struct_oxid"] = structure_to_oxidstructure(df["structure"])
        self.assertEqual(df["struct_oxid"].tolist()[0][0].specie.oxi_state, -1)
        self.assertEqual(df["struct_oxid"].tolist()[0][1].specie.oxi_state, +1)

        df["struct_oxid2"] = structure_to_oxidstructure(df["structure"], oxi_states_override={"Cl": [-2], "Cs": [+2]})
        self.assertEqual(df["struct_oxid2"].tolist()[0][0].specie.oxi_state, -2)
        self.assertEqual(df["struct_oxid2"].tolist()[0][1].specie.oxi_state, +2)

        # original is preserved
        self.assertEqual(df["structure"].tolist()[0][0].specie, Element("Cl"))

        # test in-place
        structure_to_oxidstructure(df["structure"], inplace=True)
        self.assertEqual(df["structure"].tolist()[0][0].specie.oxi_state, -1)
Exemplo n.º 4
0
    def _compute_form_en(self):
        """
        compute the formation energies for all defects in the analyzer
        """
        self._formation_energies = []
        for d in self._defects:
            #compensate each element in defect with the chemical potential
            mu_needed_coeffs = {}
            for elt in d.entry.composition.elements:
                el_def_comp = d.entry.composition[elt]
                el_blk_comp = self._entry_bulk.composition[elt]
                mu_needed_coeffs[Element(elt)] = el_blk_comp - el_def_comp

            sum_mus = 0.0
            for elt in mu_needed_coeffs:
                sum_mus += mu_needed_coeffs[elt] * self._mu_elts[elt]

            self._formation_energies.append(
                    d.entry.energy - self._entry_bulk.energy + \
                            sum_mus + d.charge*self._e_vbm + \
                            d.charge_correction + d.other_correction)
Exemplo n.º 5
0
    def test_magnetic_properties(self):

        msa = CollinearMagneticStructureAnalyzer(self.GdB4)
        self.assertFalse(msa.is_collinear)

        msa = CollinearMagneticStructureAnalyzer(self.Fe)
        self.assertFalse(msa.is_magnetic)

        self.Fe.add_site_property('magmom', [5])

        msa = CollinearMagneticStructureAnalyzer(self.Fe)
        self.assertTrue(msa.is_magnetic)
        self.assertTrue(msa.is_collinear)
        self.assertEqual(msa.ordering, Ordering.FM)

        msa = CollinearMagneticStructureAnalyzer(self.NiO, make_primitive=False,
                                                 overwrite_magmom_mode="replace_all_if_undefined")
        self.assertEqual(msa.number_of_magnetic_sites, 4)
        self.assertEqual(msa.number_of_unique_magnetic_sites(), 1)
        self.assertEqual(msa.types_of_magnetic_specie, [Element('Ni')])
        self.assertEqual(msa.get_exchange_group_info(), ('Fm-3m', 225))
Exemplo n.º 6
0
def gen_iupac_ordering():
    periodic_table = loadfn("periodic_table.json")
    order = [
        ([18], range(6, 0, -1)),  # noble gasses
        ([1], range(7, 1, -1)),  # alkali metals
        ([2], range(7, 1, -1)),  # alkali earth metals
        (range(17, 2, -1), [9]),  # actinides
        (range(17, 2, -1), [8]),  # lanthanides
        ([3], (5, 4)),  # Y, Sc
        ([4], (6, 5, 4)),  # Hf -> Ti
        ([5], (6, 5, 4)),  # Ta -> V
        ([6], (6, 5, 4)),  # W -> Cr
        ([7], (6, 5, 4)),  # Re -> Mn
        ([8], (6, 5, 4)),  # Os -> Fe
        ([9], (6, 5, 4)),  # Ir -> Co
        ([10], (6, 5, 4)),  # Pt -> Ni
        ([11], (6, 5, 4)),  # Au -> Cu
        ([12], (6, 5, 4)),  # Hg -> Zn
        ([13], range(6, 1, -1)),  # Tl -> B
        ([14], range(6, 1, -1)),  # Pb -> C
        ([15], range(6, 1, -1)),  # Bi -> N
        ([1], [1]),  # Hydrogen
        ([16], range(6, 1, -1)),  # Po -> O
        ([17], range(6, 1, -1)),
    ]  # At -> F

    order = sum((list(product(x, y)) for x, y in order), [])
    iupac_ordering_dict = dict(zip([Element.from_row_and_group(row, group) for group, row in order], range(len(order))))

    # first clean periodic table of any IUPAC ordering
    for el in periodic_table:
        periodic_table[el].pop("IUPAC ordering", None)

    # now add iupac ordering
    for el in periodic_table:
        if "IUPAC ordering" in periodic_table[el]:
            # sanity check that we don't cover the same element twice
            raise KeyError(f"IUPAC ordering already exists for {el}")

        periodic_table[el]["IUPAC ordering"] = iupac_ordering_dict[get_el_sp(el)]
Exemplo n.º 7
0
 def __init__(self,
              cutoff: float,
              twojmax: int,
              element_profile: Dict,
              quadratic: bool = False,
              pot_fit: bool = False,
              include_stress: bool = False,
              feature_batch: str = "pandas_concat",
              **kwargs):
     """
     Args:
         cutoff (float): The cutoff distance.
         twojmax (int): Band limit for bispectrum components.
         element_profile (dict): Parameters (cutoff factor 'r' and weight 'w')
             related to each element, e.g.,
             {'Na': {'r': 0.3, 'w': 0.9},
              'Cl': {'r': 0.7, 'w': 3.0}}
         quadratic (bool): Whether including quadratic terms.
             Default to False.
         pot_fit (bool): Whether combine the dataframe for potential fitting.
         include_stress (bool): Whether to include stress components.
         way to batch together a list of features
         **kwargs: keyword args to specify memory, verbose, and n_jobs
     """
     from maml.apps.pes import SpectralNeighborAnalysis
     self.calculator = SpectralNeighborAnalysis(
         rcut=cutoff,
         twojmax=twojmax,
         element_profile=element_profile,
         quadratic=quadratic)
     self.rcutfac = cutoff
     self.twojmax = twojmax
     self.elements = sorted(element_profile.keys(),
                            key=lambda x: Element(x))
     self.element_profile = element_profile
     self.quadratic = quadratic
     self.pot_fit = pot_fit
     self.include_stress = include_stress
     super().__init__(feature_batch=feature_batch, **kwargs)
Exemplo n.º 8
0
    def _make_local_species_info(self):
        self._species, self._initial_coords, self._final_coords = [], [], []
        self._site_info, self._vectors, self._vector_colors = [], {}, []

        specie_idx = 1
        for specie, disp in zip(self.defect_structure, self.displacements):
            if disp and disp.distance_from_defect < self.cutoff:
                self._species.append(Element(disp.specie))
                self._initial_coords.append(disp.original_pos)
                self._final_coords.append(specie.frac_coords)
                info = f"{round(disp.distance_from_defect, 1)}"

                if disp and disp.displace_distance > self.min_displace_w_arrows:
                    self._vectors[specie_idx] = \
                        (v * self.arrow_factor for v in disp.disp_vector)
                    # max=200 instead of 255 since (255, 255, 255) is white.
                    rgb = int(200 * disp.angle / 180)
                    self._vector_colors.append((rgb, rgb, 100))
                    info += f"_{round(disp.displace_distance, 2)}"

                self._site_info.append(info)
                specie_idx += 1
Exemplo n.º 9
0
    def _setup(self):

        def add_args(l):
            return l + compute_args if l.startswith('compute') else l
        compute_args = '1 0.99363 {} '.format(self.twojmax)
        el_in_seq = sorted(self.element_profile.keys(), key=lambda x: Element(x))
        cutoffs = [self.element_profile[e]['r'] * self.rcutfac for e in el_in_seq]
        weights = [self.element_profile[e]['w'] for e in el_in_seq]
        compute_args += ' '.join([str(p) for p in cutoffs + weights])
        compute_args += ' rmin0 0 quadraticflag {}'.format(int(self.quadratic))
        CMDS = list(map(add_args, self._CMDS))
        CMDS[2] += ' bzeroflag 0'
        CMDS[3] += ' bzeroflag 0'
        CMDS[4] += ' bzeroflag 0'
        dump_modify = 'dump_modify 1 element '
        dump_modify += ' '.join(str(e) for e in el_in_seq)
        CMDS.append(dump_modify)
        ALL_CMDS = self._COMMON_CMDS[:]
        ALL_CMDS[-1:-1] = CMDS
        input_file = 'in.sna'
        with open(input_file, 'w') as f:
            f.write(_pretty_input(ALL_CMDS))
        return input_file
Exemplo n.º 10
0
    def _create_dict_for_feature_table(
            picklefile: Union[str, Path]) -> List[dict]:
        """Reads in a pickle with features and returns a list of dictionaries with one dictionary per metal site.

        Arguments:
            picklefile (Union[str, Path]) -- path to pickle file

        Returns:
            List[dict] -- list of dicionary
        """
        warnings.warn(
            "This method will be removed in the next major release",
            DeprecationWarning,
        )
        result = read_pickle(picklefile)

        result_list = []
        for key, value in result.items():
            e = Element(key)

            metal_encoding = [
                e.number, e.row, e.group,
                np.random.randint(1, 18)
            ]
            features = list(value["feature"])
            features.extend(metal_encoding)
            result_dict = {
                "metal": key,
                "coords": value["coords"],
                "feature": features,
                "name": Path(picklefile).stem,
            }

            if not np.isnan(np.array(features)).any():
                result_list.append(result_dict)

        return result_list
Exemplo n.º 11
0
 def test_stitch_l23(self):
     self.l2_xanes.y[0] = 0.1
     with warnings.catch_warnings(record=True) as w:
         warnings.simplefilter("always")
         XAS.stitch(self.l2_xanes, self.l3_xanes, 100, mode="L23")
         self.assertEqual(len(w), 1)
         self.assertIs(w[-1].category, UserWarning)
         self.assertIn("jump", str(w[-1].message))
     self.l2_xanes = XAS.from_dict(l2_xanes_dict)
     l23 = XAS.stitch(self.l2_xanes, self.l3_xanes, 100, mode="L23")
     self.assertIsInstance(l23, XAS)
     self.assertEqual("L23", l23.edge)
     self.assertAlmostEqual(min(l23.x), min(self.l3_xanes.x), 3)
     self.assertAlmostEqual(max(l23.x), max(self.l3_xanes.x), 3)
     self.assertTrue(np.greater_equal(l23.y, self.l2_xanes.y).all())
     self.assertEqual(len(l23.x), 100)
     self.l2_xanes.spectrum_type = "EXAFS"
     self.assertRaises(ValueError,  XAS.stitch,
                       self.l2_xanes, self.l3_xanes, mode="L23")
     self.l2_xanes.absorbing_element = Element("Pt")
     self.assertRaises(ValueError, XAS.stitch,
                       self.l2_xanes, self.l3_xanes, mode="L23")
     self.assertRaises(ValueError, XAS.stitch,
                       self.k_xanes, self.l3_xanes, mode="L23")
Exemplo n.º 12
0
    def _process_multielement_entries(self):
        """
        Create entries for multi-element Pourbaix construction
        """
        N = len(self._elt_comp)  # No. of elements
        entries = self._unprocessed_entries
        el_list = self._elt_comp.keys()
        comp_list = [self._elt_comp[el] for el in el_list]
        list_of_entries = list()
        for j in range(1, N + 1):
            list_of_entries += list(itertools.combinations(
                                list(range(len(entries))), j))
        processed_entries = list()
        for entry_list in list_of_entries:
            # Check if all elements in composition list are present in
            # entry_list
            if not (set([Element(el) for el in el_list]).issubset(
                    set(list(chain.from_iterable([entries[i].composition.keys()
                                                  for i in entry_list]))))):
                continue
            if len(entry_list) == 1:
                # If only one entry in entry_list, then check if the composition matches with the set composition. 
                entry = entries[entry_list[0]]
                dict_of_non_oh = dict(zip([key for key in entry.composition.keys() if key.symbol not in ["O", "H"]],
                                           [entry.composition[key] for key in [key for key in entry.composition.keys() if key.symbol not in ["O", "H"]]]))
                if Composition(dict(zip(self._elt_comp.keys(), [self._elt_comp[key] / min([self._elt_comp[key] for key in self._elt_comp.keys()])
                                                                 for key in self._elt_comp.keys()]))).reduced_formula ==\
                        Composition(dict(zip(dict_of_non_oh.keys(), [dict_of_non_oh[el] / min([dict_of_non_oh[key] for key in dict_of_non_oh.keys()])
                                                                     for el in dict_of_non_oh.keys()]))).reduced_formula:                                                                     
                    processed_entries.append(MultiEntry([entry], [1.0]))
                continue

            A = [[0.0] * (len(entry_list) - 1) for _ in range(len(entry_list) - 1)]
            multi_entries = [entries[j] for j in entry_list]
            entry0 = entries[entry_list[0]]
            comp0 = entry0.composition
            if entry0.phase_type == "Solid":
                red_fac = comp0.get_reduced_composition_and_factor()[1]
            else:
                red_fac = 1.0
            sum_nel = sum([comp0[el] / red_fac for el in el_list])
            b = [comp0[Element(el_list[i])] / red_fac - comp_list[i] * sum_nel
                 for i in range(1, len(entry_list))]
            for j in range(1, len(entry_list)):
                entry = entries[entry_list[j]]
                comp = entry.composition
                if entry.phase_type == "Solid":
                    red_fac = comp.get_reduced_composition_and_factor()[1]
                else:
                    red_fac = 1.0
                sum_nel = sum([comp[el] / red_fac for el in el_list])
                for i in range(1, len(entry_list)):
                    el = el_list[i]
                    A[i-1][j-1] = comp_list[i] * sum_nel -\
                        comp[Element(el)] / red_fac
            try:
                weights = np.linalg.solve(np.array(A), np.array(b))
            except np.linalg.linalg.LinAlgError as err:
                if 'Singular matrix' in err.message:
                    continue
                else:
                    raise Exception("Unknown Error message!")
            if not(np.all(weights > 0.0)):
                continue
            weights = list(weights)
            weights.insert(0, 1.0)
            super_entry = MultiEntry(multi_entries, weights)
            processed_entries.append(super_entry)
        return processed_entries
Exemplo n.º 13
0
    def get_element_representation(name):
        """
        generate one-hot representation for a element, e.g, si = [0.0, 1.0, 0.0, 0.0, ...]

        Parameters
        ----------
        name: string
            element symbol
        """
        element = Element(name)
        general_element_electronic = {
            's1': 0.0,
            's2': 0.0,
            'p1': 0.0,
            'p2': 0.0,
            'p3': 0.0,
            'p4': 0.0,
            'p5': 0.0,
            'p6': 0.0,
            'd1': 0.0,
            'd2': 0.0,
            'd3': 0.0,
            'd4': 0.0,
            'd5': 0.0,
            'd6': 0.0,
            'd7': 0.0,
            'd8': 0.0,
            'd9': 0.0,
            'd10': 0.0,
            'f1': 0.0,
            'f2': 0.0,
            'f3': 0.0,
            'f4': 0.0,
            'f5': 0.0,
            'f6': 0.0,
            'f7': 0.0,
            'f8': 0.0,
            'f9': 0.0,
            'f10': 0.0,
            'f11': 0.0,
            'f12': 0.0,
            'f13': 0.0,
            'f14': 0.0
        }

        general_electron_subshells = [
            's1', 's2', 'p1', 'p2', 'p3', 'p4', 'p5', 'p6', 'd1', 'd2', 'd3',
            'd4', 'd5', 'd6', 'd7', 'd8', 'd9', 'd10', 'f1', 'f2', 'f3', 'f4',
            'f5', 'f6', 'f7', 'f8', 'f9', 'f10', 'f11', 'f12', 'f13', 'f14'
        ]

        if name == 'H':
            element_electronic_structure = ['s1']
        elif name == 'He':
            element_electronic_structure = ['s2']
        else:
            element_electronic_structure = [
                ''.join(pair) for pair in re.findall(
                    r"\.\d(\w+)<sup>(\d+)</sup>", element.electronic_structure)
            ]
        for eletron_subshell in element_electronic_structure:
            general_element_electronic[eletron_subshell] = 1.0

        return np.array([
            general_element_electronic[key]
            for key in general_electron_subshells
        ])
Exemplo n.º 14
0
    def from_file(cls, filename, atom_style=None):
        description, headers, sections = cls._parse_data_file(filename)
        errors = cls._validate_data_file(headers, sections)
        if errors:
            raise ValueError('data file is invalid: {}'.format(errors))

        xlo, xhi = headers['xlo xhi']
        ylo, yhi = headers['ylo yhi']
        zlo, zhi = headers['zlo zhi']
        xy, xz, yz = headers.get('xy xz yz', [0, 0, 0])
        lammps_box = LammpsBox(xhi, yhi, zhi, xlo, ylo, zlo, xy, xz, yz)

        # Guess symbol from closest atomic mass (yes not great for isotopes)
        elements = np.array([
            tuple([Element(element).atomic_mass,
                   Element(element)]) for element in periodic_table
        ],
                            dtype={
                                'names': ['mass', 'symbol'],
                                'formats': [np.float64, np.chararray]
                            })
        symbol_indicies = {}
        index_symbols = {}  # Makes element lookup quicker
        masses = {}
        for index, atomic_mass, in sections['Masses']['data']:
            symbol = elements['symbol'][np.abs(elements['mass'] -
                                               atomic_mass).argmin()]
            symbol_indicies[Element(symbol)] = index
            index_symbols[index] = Element(symbol)
            masses[Element(symbol)] = atomic_mass

        # Default full format or use check format
        atoms = []
        sections['Atoms']['data'].sort(
            order='f0')  # f0 is default name of first field
        for atom in sections['Atoms']['data']:
            if sections['Atoms'].get('check') == 'full':
                atom_type, charge, *position = atom['f2'], atom['f3'], atom[
                    'f4'], atom['f5'], atom['f6']
            else:
                index, mol, atom_type, charge, *position = atom

            element = index_symbols[atom_type]
            atoms.append([element, charge, position])

        velocities = None
        if 'Velocities' in sections:
            sections['Velocities']['data'].sort(
                order='f0')  # f0 is default name of first field
            velocities = [[vx, vy, vz] for vx, vy, vz in sections['Velocities']
                          ['data'][['f1', 'f2', 'f3']]]

        # Get Potentials
        # TODO only gets pair potentials for now and no reason to keep str
        pair_potentials = {}
        if 'PairIJ Coeffs' in sections:
            for s1, s2, *parameters in sections['PairIJ Coeffs']['data']:
                pair_potentials[(index_symbols[s1],
                                 index_symbols[s2])] = ' '.join(
                                     list(map(str, parameters)))

        potentials = LammpsPotentials(pair_potentials, symbol_indicies)

        return cls(description,
                   symbol_indicies,
                   masses,
                   atoms,
                   lammps_box,
                   potentials=potentials,
                   velocities=velocities)
Exemplo n.º 15
0
    def _parse(self, filename):
        start_patt = re.compile(" \(Enter \S+l101\.exe\)")
        route_patt = re.compile(" #[pPnNtT]*.*")
        link0_patt = re.compile("^\s(%.+)\s*=\s*(.+)")
        charge_mul_patt = re.compile("Charge\s+=\s*([-\\d]+)\s+"
                                     "Multiplicity\s+=\s*(\d+)")
        num_basis_func_patt = re.compile("([0-9]+)\s+basis functions")
        pcm_patt = re.compile("Polarizable Continuum Model")
        stat_type_patt = re.compile("imaginary frequencies")
        scf_patt = re.compile("E\(.*\)\s*=\s*([-\.\d]+)\s+")
        mp2_patt = re.compile("EUMP2\s*=\s*(.*)")
        oniom_patt = re.compile("ONIOM:\s+extrapolated energy\s*=\s*(.*)")
        termination_patt = re.compile("(Normal|Error) termination")
        error_patt = re.compile(
            "(! Non-Optimized Parameters !|Convergence failure)")
        mulliken_patt = re.compile(
            "^\s*(Mulliken charges|Mulliken atomic charges)")
        mulliken_charge_patt = re.compile(
            '^\s+(\d+)\s+([A-Z][a-z]?)\s*(\S*)')
        end_mulliken_patt = re.compile(
            '(Sum of Mulliken )(.*)(charges)\s*=\s*(\D)')
        std_orientation_patt = re.compile("Standard orientation")
        end_patt = re.compile("--+")
        orbital_patt = re.compile("Alpha\s*\S+\s*eigenvalues --(.*)")
        thermo_patt = re.compile("(Zero-point|Thermal) correction(.*)="
                                 "\s+([\d\.-]+)")
        forces_on_patt = re.compile(
            "Center\s+Atomic\s+Forces\s+\(Hartrees/Bohr\)")
        forces_off_patt = re.compile("Cartesian\s+Forces:\s+Max.*RMS.*")
        forces_patt = re.compile(
            "\s+(\d+)\s+(\d+)\s+([0-9\.-]+)\s+([0-9\.-]+)\s+([0-9\.-]+)")

        freq_on_patt = re.compile(
            "Harmonic\sfrequencies\s+\(cm\*\*-1\),\sIR\sintensities.*Raman.*")
        freq_patt = re.compile("Frequencies\s--\s+(.*)")
        normal_mode_patt = re.compile(
            "\s+(\d+)\s+(\d+)\s+([0-9\.-]{4,5})\s+([0-9\.-]{4,5}).*")

        self.properly_terminated = False
        self.is_pcm = False
        self.stationary_type = "Minimum"
        self.structures = []
        self.corrections = {}
        self.energies = []
        self.pcm = None
        self.errors = []
        self.Mulliken_charges = {}
        self.link0 = {}
        self.cart_forces = []
        self.frequencies = []

        coord_txt = []
        read_coord = 0
        read_mulliken = False
        orbitals_txt = []
        parse_stage = 0
        num_basis_found = False
        terminated = False
        parse_forces = False
        forces = []
        parse_freq = False
        frequencies = []

        with zopen(filename) as f:
            for line in f:
                if parse_stage == 0:
                    if start_patt.search(line):
                        parse_stage = 1
                    elif link0_patt.match(line):
                        m = link0_patt.match(line)
                        self.link0[m.group(1)] = m.group(2)
                    elif route_patt.search(line):
                        params = read_route_line(line)
                        self.functional = params[0]
                        self.basis_set = params[1]
                        self.route = params[2]
                        self.dieze_tag = params[3]
                        parse_stage = 1
                elif parse_stage == 1:
                    if charge_mul_patt.search(line):
                        m = charge_mul_patt.search(line)
                        self.charge = int(m.group(1))
                        self.spin_mult = int(m.group(2))
                        parse_stage = 2
                elif parse_stage == 2:

                    if self.is_pcm:
                        self._check_pcm(line)

                    if "FREQ" in self.route and thermo_patt.search(line):
                        m = thermo_patt.search(line)
                        if m.group(1) == "Zero-point":
                            self.corrections["Zero-point"] = float(m.group(3))
                        else:
                            key = m.group(2).strip(" to ")
                            self.corrections[key] = float(m.group(3))

                    if read_coord:
                        if not end_patt.search(line):
                            coord_txt.append(line)
                        else:
                            read_coord = (read_coord + 1) % 4
                            if not read_coord:
                                sp = []
                                coords = []
                                for l in coord_txt[2:]:
                                    toks = l.split()
                                    sp.append(Element.from_Z(int(toks[1])))
                                    coords.append([float(i) for i in toks[3:6]])
                                self.structures.append(Molecule(sp, coords))

                    if parse_forces:
                        m = forces_patt.search(line)
                        if m:
                            forces.extend([float(_v) for _v in m.groups()[2:5]])
                        elif forces_off_patt.search(line):
                            self.cart_forces.append(forces)
                            forces = []
                            parse_forces = False

                    elif parse_freq:
                        m = freq_patt.search(line)
                        if m:
                            values = [float(_v) for _v in m.groups()[0].split()]
                            for value in values:
                                frequencies.append([value, []])
                        elif normal_mode_patt.search(line):
                            values = [float(_v) for _v in line.split()[2:]]
                            n = int(len(values) / 3)
                            for i in range(0, len(values), 3):
                                j = -n + int(i / 3)
                                frequencies[j][1].extend(values[i:i+3])
                        elif line.find("-------------------") != -1:
                            parse_freq = False
                            self.frequencies.append(frequencies)
                            frequencies = []

                    elif termination_patt.search(line):
                        m = termination_patt.search(line)
                        if m.group(1) == "Normal":
                            self.properly_terminated = True
                            terminated = True
                    elif error_patt.search(line):
                        error_defs = {
                            "! Non-Optimized Parameters !": "Optimization "
                                                            "error",
                            "Convergence failure": "SCF convergence error"
                        }
                        m = error_patt.search(line)
                        self.errors.append(error_defs[m.group(1)])
                    elif (not num_basis_found) and \
                            num_basis_func_patt.search(line):
                        m = num_basis_func_patt.search(line)
                        self.num_basis_func = int(m.group(1))
                        num_basis_found = True
                    elif (not self.is_pcm) and pcm_patt.search(line):
                        self.is_pcm = True
                        self.pcm = {}
                    elif "FREQ" in self.route and "OPT" in self.route and \
                            stat_type_patt.search(line):
                        self.stationary_type = "Saddle"
                    elif mp2_patt.search(line):
                        m = mp2_patt.search(line)
                        self.energies.append(float(m.group(1).replace("D",
                                                                      "E")))
                    elif oniom_patt.search(line):
                        m = oniom_patt.matcher(line)
                        self.energies.append(float(m.group(1)))
                    elif scf_patt.search(line):
                        m = scf_patt.search(line)
                        self.energies.append(float(m.group(1)))
                    elif std_orientation_patt.search(line):
                        coord_txt = []
                        read_coord = 1
                    elif orbital_patt.search(line):
                        orbitals_txt.append(line)
                    elif mulliken_patt.search(line):
                        mulliken_txt = []
                        read_mulliken = True
                    elif not parse_forces and forces_on_patt.search(line):
                        parse_forces = True
                    elif freq_on_patt.search(line):
                        parse_freq = True

                    if read_mulliken:
                        if not end_mulliken_patt.search(line):
                            mulliken_txt.append(line)
                        else:
                            m = end_mulliken_patt.search(line)
                            mulliken_charges = {}
                            for line in mulliken_txt:
                                if mulliken_charge_patt.search(line):
                                    m = mulliken_charge_patt.search(line)
                                    dict = {int(m.group(1)): [m.group(2), float(m.group(3))]}
                                    mulliken_charges.update(dict)
                            read_mulliken = False
                            self.Mulliken_charges = mulliken_charges

        if not terminated:
            #raise IOError("Bad Gaussian output file.")
            warnings.warn("\n" + self.filename + \
                ": Termination error or bad Gaussian output file !")
    return (e_above_hull)


key_element = 'Li'
tieline_dataframe = pd.read_csv(
    'Tables/{element}/tieline_distinct.csv'.format(element=key_element))

# remove noble gas
boolean_gas = tieline_dataframe.pretty_formula.apply(
    lambda x: True not in [e.is_noble_gas for e in Composition(x).elements])
no_nobel_gas_dataframe = tieline_dataframe[boolean_gas]

# screen phases without the key element
boolean_element = no_nobel_gas_dataframe.pretty_formula.apply(
    lambda x: Element(key_element) not in Composition(x).elements)
vanishing_solubility_phases_dataframe = no_nobel_gas_dataframe[boolean_element]
vanishing_solubility_phases_dataframe.reset_index(drop=True, inplace=True)
vanishing_solubility_phases_dataframe.to_csv(
    'Tables/{element}/tieline_without_solubility_and_gas.csv'.format(
        element=key_element),
    index=False)

# pick up those phases with band gap >= 3eV
material_id_list = vanishing_solubility_phases_dataframe[
    'material_id'].to_list()
with MPRester(api_key='') as mpr:
    # entry id is an alias of task id
    candidates = mpr.query(criteria={
        'task_id': {
            '$in': material_id_list
Exemplo n.º 17
0
 def test_get_plot_form_energy(self):
     mu_elts = {Element('As'): 0, Element('Ga'): 0}
     self.dp.get_plot_form_energy(mu_elts).savefig('test.pdf')
     self.assertTrue(os.path.exists('test.pdf'))
     os.system('rm test.pdf')
Exemplo n.º 18
0
 def test_get_oxidation(self):
     self.assertEqual((3, ),
                      self.data_source.get_oxidation_states(Element("Nd")))
     self.data_source.use_common_oxi_states = False
     self.assertEqual((2, 3),
                      self.data_source.get_oxidation_states(Element("Nd")))
Exemplo n.º 19
0
    def _parse(self, filename):
        start_patt = re.compile(" \(Enter \S+l101\.exe\)")
        route_patt = re.compile(" #[pPnNtT]*.*")
        link0_patt = re.compile("^\s(%.+)\s*=\s*(.+)")
        charge_mul_patt = re.compile("Charge\s+=\s*([-\\d]+)\s+" "Multiplicity\s+=\s*(\d+)")
        num_basis_func_patt = re.compile("([0-9]+)\s+basis functions")
        num_elec_patt = re.compile("(\d+)\s+alpha electrons\s+(\d+)\s+beta electrons")
        pcm_patt = re.compile("Polarizable Continuum Model")
        stat_type_patt = re.compile("imaginary frequencies")
        scf_patt = re.compile("E\(.*\)\s*=\s*([-\.\d]+)\s+")
        mp2_patt = re.compile("EUMP2\s*=\s*(.*)")
        oniom_patt = re.compile("ONIOM:\s+extrapolated energy\s*=\s*(.*)")
        termination_patt = re.compile("(Normal|Error) termination")
        error_patt = re.compile("(! Non-Optimized Parameters !|Convergence failure)")
        mulliken_patt = re.compile("^\s*(Mulliken charges|Mulliken atomic charges)")
        mulliken_charge_patt = re.compile("^\s+(\d+)\s+([A-Z][a-z]?)\s*(\S*)")
        end_mulliken_patt = re.compile("(Sum of Mulliken )(.*)(charges)\s*=\s*(\D)")
        std_orientation_patt = re.compile("Standard orientation")
        end_patt = re.compile("--+")
        orbital_patt = re.compile("(Alpha|Beta)\s*\S+\s*eigenvalues --(.*)")
        thermo_patt = re.compile("(Zero-point|Thermal) correction(.*)=" "\s+([\d\.-]+)")
        forces_on_patt = re.compile("Center\s+Atomic\s+Forces\s+\(Hartrees/Bohr\)")
        forces_off_patt = re.compile("Cartesian\s+Forces:\s+Max.*RMS.*")
        forces_patt = re.compile("\s+(\d+)\s+(\d+)\s+([0-9\.-]+)\s+([0-9\.-]+)\s+([0-9\.-]+)")

        freq_on_patt = re.compile("Harmonic\sfrequencies\s+\(cm\*\*-1\),\sIR\sintensities.*Raman.*")
        freq_patt = re.compile("Frequencies\s--\s+(.*)")
        normal_mode_patt = re.compile("\s+(\d+)\s+(\d+)\s+([0-9\.-]{4,5})\s+([0-9\.-]{4,5}).*")

        mo_coeff_patt = re.compile("Molecular Orbital Coefficients:")
        mo_coeff_name_patt = re.compile("\d+\s((\d+|\s+)\s+([a-zA-Z]{1,2}|\s+))\s+(\d+\S+)")

        self.properly_terminated = False
        self.is_pcm = False
        self.stationary_type = "Minimum"
        self.structures = []
        self.corrections = {}
        self.energies = []
        self.pcm = None
        self.errors = []
        self.Mulliken_charges = {}
        self.link0 = {}
        self.cart_forces = []
        self.frequencies = []
        self.eigenvalues = []
        self.is_spin = False

        coord_txt = []
        read_coord = 0
        read_mulliken = False
        read_eigen = False
        eigen_txt = []
        parse_stage = 0
        num_basis_found = False
        terminated = False
        parse_forces = False
        forces = []
        parse_freq = False
        frequencies = []
        read_mo = False

        with zopen(filename) as f:
            for line in f:
                if parse_stage == 0:
                    if start_patt.search(line):
                        parse_stage = 1
                    elif link0_patt.match(line):
                        m = link0_patt.match(line)
                        self.link0[m.group(1)] = m.group(2)
                    elif route_patt.search(line):
                        params = read_route_line(line)
                        self.functional = params[0]
                        self.basis_set = params[1]
                        self.route = params[2]
                        self.dieze_tag = params[3]
                        parse_stage = 1
                elif parse_stage == 1:
                    if charge_mul_patt.search(line):
                        m = charge_mul_patt.search(line)
                        self.charge = int(m.group(1))
                        self.spin_mult = int(m.group(2))
                        parse_stage = 2
                elif parse_stage == 2:

                    if self.is_pcm:
                        self._check_pcm(line)

                    if "FREQ" in self.route and thermo_patt.search(line):
                        m = thermo_patt.search(line)
                        if m.group(1) == "Zero-point":
                            self.corrections["Zero-point"] = float(m.group(3))
                        else:
                            key = m.group(2).strip(" to ")
                            self.corrections[key] = float(m.group(3))

                    if read_coord:
                        if not end_patt.search(line):
                            coord_txt.append(line)
                        else:
                            read_coord = (read_coord + 1) % 4
                            if not read_coord:
                                sp = []
                                coords = []
                                for l in coord_txt[2:]:
                                    toks = l.split()
                                    sp.append(Element.from_Z(int(toks[1])))
                                    coords.append([float(i) for i in toks[3:6]])
                                self.structures.append(Molecule(sp, coords))

                    if parse_forces:
                        m = forces_patt.search(line)
                        if m:
                            forces.extend([float(_v) for _v in m.groups()[2:5]])
                        elif forces_off_patt.search(line):
                            self.cart_forces.append(forces)
                            forces = []
                            parse_forces = False

                    #  read molecular orbital eigenvalues
                    if read_eigen:
                        m = orbital_patt.search(line)
                        if m:
                            eigen_txt.append(line)
                        else:
                            read_eigen = False
                            self.eigenvalues = {Spin.up: []}
                            for eigenline in eigen_txt:
                                if "Alpha" in eigenline:
                                    self.eigenvalues[Spin.up] += [float(e) for e in float_patt.findall(eigenline)]
                                elif "Beta" in eigenline:
                                    if Spin.down not in self.eigenvalues:
                                        self.eigenvalues[Spin.down] = []
                                    self.eigenvalues[Spin.down] += [float(e) for e in float_patt.findall(eigenline)]
                            eigen_txt = []

                    # read molecular orbital coefficients
                    if read_mo:
                        # build a matrix with all coefficients
                        all_spin = [Spin.up]
                        if self.is_spin:
                            all_spin.append(Spin.down)

                        mat_mo = {}
                        for spin in all_spin:
                            mat_mo[spin] = np.zeros((self.num_basis_func, self.num_basis_func))
                            nMO = 0
                            end_mo = False
                            while nMO < self.num_basis_func and not end_mo:
                                f.readline()
                                f.readline()
                                self.atom_basis_labels = []
                                for i in range(self.num_basis_func):
                                    line = f.readline()

                                    # identify atom and OA labels
                                    m = mo_coeff_name_patt.search(line)
                                    if m.group(1).strip() != "":
                                        iat = int(m.group(2)) - 1
                                        # atname = m.group(3)
                                        self.atom_basis_labels.append([m.group(4)])
                                    else:
                                        self.atom_basis_labels[iat].append(m.group(4))

                                    #  MO coefficients
                                    coeffs = [float(c) for c in float_patt.findall(line)]
                                    for j in range(len(coeffs)):
                                        mat_mo[spin][i, nMO + j] = coeffs[j]

                                nMO += len(coeffs)
                                line = f.readline()
                                # manage pop=regular case (not all MO)
                                if nMO < self.num_basis_func and (
                                    "Density Matrix:" in line or mo_coeff_patt.search(line)
                                ):
                                    end_mo = True
                                    warnings.warn("POP=regular case, matrix coefficients not complete")
                            f.readline()

                        self.eigenvectors = mat_mo
                        read_mo = False

                        # build a more convenient array dict with MO coefficient of
                        # each atom in each MO.
                        # mo[Spin][OM j][atom i] = {AO_k: coeff, AO_k: coeff ... }
                        mo = {}
                        for spin in all_spin:
                            mo[spin] = [
                                [{} for iat in range(len(self.atom_basis_labels))] for j in range(self.num_basis_func)
                            ]
                            for j in range(self.num_basis_func):
                                i = 0
                                for iat in range(len(self.atom_basis_labels)):
                                    for label in self.atom_basis_labels[iat]:
                                        mo[spin][j][iat][label] = self.eigenvectors[spin][i, j]
                                        i += 1

                        self.molecular_orbital = mo

                    elif parse_freq:
                        m = freq_patt.search(line)
                        if m:
                            values = [float(_v) for _v in m.groups()[0].split()]
                            for value in values:
                                frequencies.append([value, []])
                        elif normal_mode_patt.search(line):
                            values = [float(_v) for _v in line.split()[2:]]
                            n = int(len(values) / 3)
                            for i in range(0, len(values), 3):
                                j = -n + int(i / 3)
                                frequencies[j][1].extend(values[i : i + 3])
                        elif line.find("-------------------") != -1:
                            parse_freq = False
                            self.frequencies.append(frequencies)
                            frequencies = []

                    elif termination_patt.search(line):
                        m = termination_patt.search(line)
                        if m.group(1) == "Normal":
                            self.properly_terminated = True
                            terminated = True
                    elif error_patt.search(line):
                        error_defs = {
                            "! Non-Optimized Parameters !": "Optimization " "error",
                            "Convergence failure": "SCF convergence error",
                        }
                        m = error_patt.search(line)
                        self.errors.append(error_defs[m.group(1)])
                    elif (not num_basis_found) and num_basis_func_patt.search(line):
                        m = num_basis_func_patt.search(line)
                        self.num_basis_func = int(m.group(1))
                        num_basis_found = True
                    elif num_elec_patt.search(line):
                        m = num_elec_patt.search(line)
                        self.electrons = (int(m.group(1)), int(m.group(2)))
                    elif (not self.is_pcm) and pcm_patt.search(line):
                        self.is_pcm = True
                        self.pcm = {}
                    elif "FREQ" in self.route and "OPT" in self.route and stat_type_patt.search(line):
                        self.stationary_type = "Saddle"
                    elif mp2_patt.search(line):
                        m = mp2_patt.search(line)
                        self.energies.append(float(m.group(1).replace("D", "E")))
                    elif oniom_patt.search(line):
                        m = oniom_patt.matcher(line)
                        self.energies.append(float(m.group(1)))
                    elif scf_patt.search(line):
                        m = scf_patt.search(line)
                        self.energies.append(float(m.group(1)))
                    elif std_orientation_patt.search(line):
                        coord_txt = []
                        read_coord = 1
                    elif not read_eigen and orbital_patt.search(line):
                        eigen_txt.append(line)
                        read_eigen = True
                    elif mulliken_patt.search(line):
                        mulliken_txt = []
                        read_mulliken = True
                    elif not parse_forces and forces_on_patt.search(line):
                        parse_forces = True
                    elif freq_on_patt.search(line):
                        parse_freq = True
                    elif mo_coeff_patt.search(line):
                        if "Alpha" in line:
                            self.is_spin = True
                        read_mo = True

                    if read_mulliken:
                        if not end_mulliken_patt.search(line):
                            mulliken_txt.append(line)
                        else:
                            m = end_mulliken_patt.search(line)
                            mulliken_charges = {}
                            for line in mulliken_txt:
                                if mulliken_charge_patt.search(line):
                                    m = mulliken_charge_patt.search(line)
                                    dict = {int(m.group(1)): [m.group(2), float(m.group(3))]}
                                    mulliken_charges.update(dict)
                            read_mulliken = False
                            self.Mulliken_charges = mulliken_charges

        if not terminated:
            # raise IOError("Bad Gaussian output file.")
            warnings.warn("\n" + self.filename + ": Termination error or bad Gaussian output file !")
Exemplo n.º 20
0
    def test_electronegativity(self):
        sm = StructureMatcher(ltol=0.2, stol=0.3, angle_tol=5)

        s1 = Structure.from_file(os.path.join(test_dir, "Na2Fe2PAsO4S4.json"))
        s2 = Structure.from_file(os.path.join(test_dir, "Na2Fe2PNO4Se4.json"))
        self.assertEqual(
            sm.get_best_electronegativity_anonymous_mapping(s1, s2), {
                Element('S'): Element('Se'),
                Element('As'): Element('N'),
                Element('Fe'): Element('Fe'),
                Element('Na'): Element('Na'),
                Element('P'): Element('P'),
                Element('O'): Element('O'),
            })
        self.assertEqual(len(sm.get_all_anonymous_mappings(s1, s2)), 2)
Exemplo n.º 21
0
    def _parse(self, filename):

        start_patt = re.compile(" \(Enter \S+l101\.exe\)")
        route_patt = re.compile(" #[pPnNtT]*.*")
        charge_mul_patt = re.compile("Charge\s+=\s*([-\\d]+)\s+" "Multiplicity\s+=\s*(\d+)")
        num_basis_func_patt = re.compile("([0-9]+)\s+basis functions")
        pcm_patt = re.compile("Polarizable Continuum Model")
        stat_type_patt = re.compile("imaginary frequencies")
        scf_patt = re.compile("E\(.*\)\s*=\s*([-\.\d]+)\s+")
        mp2_patt = re.compile("EUMP2\s*=\s*(.*)")
        oniom_patt = re.compile("ONIOM:\s+extrapolated energy\s*=\s*(.*)")
        termination_patt = re.compile("(Normal|Error) termination of Gaussian")
        std_orientation_patt = re.compile("Standard orientation")
        end_patt = re.compile("--+")
        orbital_patt = re.compile("Alpha\s*\S+\s*eigenvalues --(.*)")
        thermo_patt = re.compile("(Zero-point|Thermal) correction(.*)=" "\s+([\d\.-]+)")

        self.properly_terminated = False
        self.is_pcm = False
        self.stationary_type = "Minimum"
        self.structures = []
        self.corrections = {}
        self.energies = []
        self.pcm = None

        coord_txt = []
        read_coord = 0
        orbitals_txt = []
        parse_stage = 0
        num_basis_found = False
        terminated = False

        with zopen(filename) as f:
            for line in f:
                if parse_stage == 0:
                    if start_patt.search(line):
                        parse_stage = 1
                    elif route_patt.search(line):
                        self.route = {}
                        for tok in line.split():
                            sub_tok = tok.strip().split("=")
                            key = sub_tok[0].upper()
                            self.route[key] = sub_tok[1].upper() if len(sub_tok) > 1 else ""
                            m = re.match("(\w+)/([^/]+)", key)
                            if m:
                                self.functional = m.group(1)
                                self.basis_set = m.group(2)
                elif parse_stage == 1:
                    if charge_mul_patt.search(line):
                        m = charge_mul_patt.search(line)
                        self.charge = int(m.group(1))
                        self.spin_mult = int(m.group(2))
                        parse_stage = 2
                elif parse_stage == 2:

                    if self.is_pcm:
                        self._check_pcm(line)

                    if "FREQ" in self.route and thermo_patt.search(line):
                        m = thermo_patt.search(line)
                        if m.group(1) == "Zero-point":
                            self.corrections["Zero-point"] = float(m.group(3))
                        else:
                            key = m.group(2).strip(" to ")
                            self.corrections[key] = float(m.group(3))

                    if read_coord:
                        if not end_patt.search(line):
                            coord_txt.append(line)
                        else:
                            read_coord = (read_coord + 1) % 4
                            if not read_coord:
                                sp = []
                                coords = []
                                for l in coord_txt[2:]:
                                    toks = l.split()
                                    sp.append(Element.from_Z(int(toks[1])))
                                    coords.append(map(float, toks[3:6]))
                                self.structures.append(Molecule(sp, coords))
                    elif termination_patt.search(line):
                        m = termination_patt.search(line)
                        if m.group(1) == "Normal":
                            self.properly_terminated = True
                        terminated = True
                    elif (not num_basis_found) and num_basis_func_patt.search(line):
                        m = num_basis_func_patt.search(line)
                        self.num_basis_func = int(m.group(1))
                        num_basis_found = True
                    elif (not self.is_pcm) and pcm_patt.search(line):
                        self.is_pcm = True
                        self.pcm = {}
                    elif "FREQ" in self.route and "OPT" in self.route and stat_type_patt.search(line):
                        self.stationary_type = "Saddle"
                    elif mp2_patt.search(line):
                        m = mp2_patt.search(line)
                        self.energies.append(float(m.group(1).replace("D", "E")))
                    elif oniom_patt.search(line):
                        m = oniom_patt.matcher(line)
                        self.energies.append(float(m.group(1)))
                    elif scf_patt.search(line):
                        m = scf_patt.search(line)
                        self.energies.append(float(m.group(1)))
                    elif std_orientation_patt.search(line):
                        coord_txt = []
                        read_coord = 1
                    elif orbital_patt.search(line):
                        orbitals_txt.append(line)
        if not terminated:
            raise IOError("Bad Gaussian output file.")
Exemplo n.º 22
0
    def train(self,
              train_structures,
              train_energies,
              train_forces,
              train_stresses=None,
              **kwargs):
        """
        Training data with moment tensor method.

        Args:
            train_structures ([Structure]): The list of Pymatgen Structure object.
                energies ([float]): The list of total energies of each structure
                in structures list.
            train_energies ([float]): List of total energies of each structure in
                structures list.
            train_forces ([np.array]): List of (m, 3) forces array of each structure
                with m atoms in structures list. m can be varied with each
                single structure case.
            train_stresses (list): List of (6, ) virial stresses of each
                structure in structures list.
            kwargs: Parameters in write_input method.
        """
        if not which('nnp-train'):
            raise RuntimeError("NNP Trainer has not been found.")
        train_structures, train_forces, train_stresses = \
            check_structures_forces_stresses(train_structures, train_forces, train_stresses)
        train_pool = pool_from(train_structures, train_energies, train_forces,
                               train_stresses)
        atoms_filename = 'input.data'

        with ScratchDir('.'):
            _ = self.write_cfgs(filename=atoms_filename, cfg_pool=train_pool)
            output = 'training_output'

            self.write_input(**kwargs)
            p_scaling = subprocess.Popen(['nnp-scaling', '100'],
                                         stdout=subprocess.PIPE,
                                         stderr=subprocess.PIPE)
            stdout, stderr = p_scaling.communicate()
            rc = p_scaling.returncode
            if rc != 0:
                error_msg = 'n2p2 exited with return code %d' % rc
                msg = stderr.decode("utf-8").split('\n')[:-1]
                try:
                    error_line = [
                        i for i, m in enumerate(msg) if m.startswith('ERROR')
                    ][0]
                    error_msg += ', '.join(msg[error_line:])
                except Exception:
                    error_msg += ', '
                    error_msg += msg[-1]
                raise RuntimeError(error_msg)

            p_train = subprocess.Popen(['nnp-train'],
                                       stdout=open(output, 'w'),
                                       stderr=subprocess.PIPE)
            stdout, stderr = p_train.communicate()
            rc = p_train.returncode
            if rc != 0:
                error_msg = 'n2p2 exited with return code %d' % rc
                msg = stderr.decode("utf-8").split('\n')[:-1]
                try:
                    error_line = [
                        i for i, m in enumerate(msg) if m.startswith('ERROR')
                    ][0]
                    error_msg += ', '.join(msg[error_line:])
                except Exception:
                    error_msg += ', '
                    error_msg += msg[-1]
                raise RuntimeError(error_msg)

            with zopen(output) as f:
                error_lines = f.read()

            energy_rmse_pattern = re.compile(
                r'ENERGY\s*\S*\s*(\S*)\s*(\S*).*?\n')
            forces_rmse_pattern = re.compile(
                r'FORCES\s*\S*\s*(\S*)\s*(\S*).*?\n')
            errors = np.array(energy_rmse_pattern.findall(error_lines),
                              dtype=np.float).T.tolist()
            self.train_energy_rmse = errors[0]
            self.validation_energy_rmse = errors[1]

            errors = np.array(forces_rmse_pattern.findall(error_lines),
                              dtype=np.float).T.tolist()
            self.train_forces_rmse = errors[0]
            self.validation_forces_rmse = errors[1]

            for specie in self.elements:
                weights_filename = 'weights.{}.{}.out'.format(
                    str(Element(specie).number).zfill(3),
                    str(self.param['epochs']).zfill(6))
                self.weights[specie] = []
                self.bs[specie] = []
                self.weight_param[specie] = []
                self.load_weights(weights_filename, specie)
            self.load_scaler('scaling.data')

        return rc
Exemplo n.º 23
0
    def load_input(self, filename='input.nn'):
        """
        Load input file from trained Neural Network Potential.

        Args:
            filename (str): The input filename.
        """
        PARAMS = {
            'general': [
                'cutoff_type', 'scale_features', 'scale_min_short',
                'scale_max_short', 'hidden_layers'
            ],
            'additional': [
                'epochs', 'updater_type', 'parallel_mode', 'jacobian_mode',
                'update_strategy', 'selection_mode', 'task_batch_size_energy',
                'task_batch_size_force', 'random_seed', 'test_fraction',
                'force_weight', 'short_energy_fraction',
                'short_force_fraction', 'short_energy_error_threshold',
                'short_force_error_threshold', 'rmse_threshold_trials',
                'weights_min', 'weights_max', 'write_trainpoints',
                'write_trainforces', 'write_weights_epoch',
                'write_neuronstats', 'kalman_type', 'kalman_epsilon',
                'kalman_q0', 'kalman_qtau', 'kalman_qmin', 'kalman_eta',
                'kalman_etatau', 'kalman_etamax'
            ]
        }

        def str_formatify(string):
            return float(string) if '.' in string \
                                    or 'e' in string else int(string)

        param = {}
        with open(filename, 'r') as f:
            lines = f.readlines()
        df = pd.DataFrame([line.split() for line in lines if "#" not in line])
        self.elements = sorted([
            element for element in np.ravel(df[df[0] == 'elements'])[1:]
            if element is not None
        ],
                               key=lambda x: Element(x))

        atom_energy = {}
        for atom, energy in zip(
                np.array(df[df[0] == 'atom_energy'])[:, 1],
                np.array(df[df[0] == 'atom_energy'])[:, 2]):
            atom_energy[atom] = float(energy)
        param.update({'atom_energy': atom_energy})
        for tag in PARAMS.get('general'):
            if tag == 'scale_features':
                scale_features = '1' \
                    if len(df[df[0] == 'scale_symmetry_functions']) != 0 else 0
                param.update({'scale_features': scale_features})
            elif tag == 'hidden_layers':
                hidden_layers = [
                    int(neuron) for neuron in np.array(df[
                        df[0] == 'global_nodes_short'])[0][1:] if neuron
                ]
                param.update({'hidden_layers': hidden_layers})
                activations = np.array(
                    df[df[0] == 'global_activation_short'])[0][1]
                param.update({'activations': activations})
            else:
                value = str_formatify(np.array(df[df[0] == tag])[0][1])
                param.update({tag: value})
        if len(df[df[0] == 'normalize_nodes']) != 0:
            param.update({'normalize_nodes': True})

        for tag in PARAMS.get('additional'):
            value = str_formatify(np.array(df[df[0] == tag])[0][1])
            param.update({tag: value})

        r_cut = np.sort(
            np.array(df[(df[0] == 'symfunction_short') & (df[2] == '2')][6],
                     dtype=np.float))[0]
        r_cut = float('{:.1f}'.format(r_cut * units.bohr_to_angstrom))
        param.update({'r_cut': r_cut})
        r_etas = np.sort(
            np.array(np.unique(df[(df[0] == 'symfunction_short')
                                  & (df[2] == '2')][4]),
                     dtype=np.float)).tolist()
        param.update({'r_etas': r_etas})
        r_shift = np.sort(
            np.array(np.unique(df[(df[0] == 'symfunction_short')
                                  & (df[2] == '2')][5]),
                     dtype=np.float)).tolist()
        r_shift = [
            float('{:.1f}'.format(r * units.bohr_to_angstrom)) for r in r_shift
        ]
        param.update({'r_shift': r_shift})
        a_etas = np.sort(
            np.array(np.unique(df[(df[0] == 'symfunction_short')
                                  & (df[2] == '3')][5]),
                     dtype=np.float)).tolist()
        param.update({'a_etas': a_etas})
        lambdas = np.sort(
            np.array(np.unique(df[(df[0] == 'symfunction_short')
                                  & (df[2] == '3')][6]),
                     dtype=np.int)).tolist()
        param.update({'lambdas': lambdas})
        zetas = np.sort(
            np.array(np.unique(df[(df[0] == 'symfunction_short')
                                  & (df[2] == '3')][7]),
                     dtype=np.float)).tolist()
        param.update({'zetas': zetas})
        self.num_symm_functions = \
            sum([len(list(itertools.product(r_etas, r_shift))) for _ in self.elements]) + \
            sum([len(list(itertools.product(a_etas, lambdas, zetas)))
                 for _, _ in itertools.combinations_with_replacement(self.elements, 2)])
        self.layer_sizes = [self.num_symm_functions] + hidden_layers
        self.param = param
Exemplo n.º 24
0
    def train(self,
              train_structures,
              train_energies,
              train_forces,
              train_stresses=None,
              default_sigma=[0.0005, 0.1, 0.05, 0.01],
              use_energies=True,
              use_forces=True,
              use_stress=False,
              **kwargs):
        """
        Training data with gaussian process regression.

        Args:
            train_structures ([Structure]): The list of Pymatgen Structure object.
                energies ([float]): The list of total energies of each structure
                in structures list.
            train_energies ([float]): List of total energies of each structure in
                structures list.
            train_forces ([np.array]): List of (m, 3) forces array of each structure
                with m atoms in structures list. m can be varied with each
                single structure case.
            train_stresses (list): List of (6, ) virial stresses of each
                structure in structures list.
            default_sigma (list): Error criteria in energies, forces, stress
                and hessian. Should have 4 numbers.
            use_energies (bool): Whether to use dft total energies for training.
                Default to True.
            use_forces (bool): Whether to use dft atomic forces for training.
                Default to True.
            use_stress (bool): Whether to use dft virial stress for training.
                Default to False.

            kwargs:
                l_max (int): Parameter to configure GAP. The band limit of
                    spherical harmonics basis function. Default to 12.
                n_max (int): Parameter to configure GAP. The number of radial basis
                    function. Default to 10.
                atom_sigma (float): Parameter to configure GAP. The width of gaussian
                    atomic density. Default to 0.5.
                zeta (float): Present when covariance function type is do product.
                    Default to 4.
                cutoff (float): Parameter to configure GAP. The cutoff radius.
                    Default to 4.0.
                cutoff_transition_width (float): Parameter to configure GAP.
                    The transition width of cutoff radial. Default to 0.5.
                delta (float): Parameter to configure Sparsification.
                    The signal variance of noise. Default to 1.
                f0 (float): Parameter to configure Sparsification.
                    The signal mean of noise. Default to 0.0.
                n_sparse (int): Parameter to configure Sparsification.
                    Number of sparse points.
                covariance_type (str): Parameter to configure Sparsification.
                    The type of convariance function. Default to dot_product.
                sparse_method (str): Method to perform clustering in sparsification.
                    Default to 'cur_points'.

                sparse_jitter (float): Intrisic error of atomic/bond energy,
                    used to regularise the sparse covariance matrix.
                    Default to 1e-8.
                e0 (float): Atomic energy value to be subtracted from energies
                    before fitting. Default to 0.0.
                e0_offset (float): Offset of baseline. If zero, the offset is
                    the average atomic energy of the input data or the e0
                    specified manually. Default to 0.0.
        """
        if not which('gap_fit'):
            raise RuntimeError(
                "gap_fit has not been found.\n",
                "Please refer to https://github.com/libAtoms/QUIP for ",
                "further detail.")

        train_structures, train_forces, train_stresses = \
            check_structures_forces_stresses(train_structures, train_forces, train_stresses)

        gap_sorted_elements = []
        for struct in train_structures:
            for specie in struct.species:
                if str(specie) not in gap_sorted_elements:
                    gap_sorted_elements.append(str(specie))

        self.elements = sorted(gap_sorted_elements, key=lambda x: Element(x))

        atoms_filename = 'train.xyz'
        xml_filename = 'train.xml'
        train_pool = pool_from(train_structures, train_energies, train_forces,
                               train_stresses)

        exe_command = ["gap_fit"]
        exe_command.append('at_file={}'.format(atoms_filename))
        gap_configure_params = [
            'l_max', 'n_max', 'atom_sigma', 'zeta', 'cutoff',
            'cutoff_transition_width', 'delta', 'f0', 'n_sparse',
            'covariance_type', 'sparse_method'
        ]
        preprocess_params = ['sparse_jitter', 'e0', 'e0_offset']
        if len(default_sigma) != 4:
            raise ValueError(
                "The default sigma is supposed to have 4 numbers.")

        gap_command = ['soap']
        for param_name in gap_configure_params:
            param = kwargs.get(param_name) if kwargs.get(param_name) \
                else soap_params.get(param_name)
            gap_command.append(param_name + '=' + '{}'.format(param))
        gap_command.append('add_species=T')
        exe_command.append("gap=" + "{" + "{}".format(' '.join(gap_command)) +
                           "}")

        for param_name in preprocess_params:
            param = kwargs.get(param_name) if kwargs.get(param_name) \
                else soap_params.get(param_name)
            exe_command.append(param_name + '=' + '{}'.format(param))

        default_sigma = [str(f) for f in default_sigma]
        exe_command.append("default_sigma={%s}" % (' '.join(default_sigma)))

        if use_energies:
            exe_command.append('energy_parameter_name=dft_energy')
        if use_forces:
            exe_command.append('force_parameter_name=dft_force')
        if use_stress:
            exe_command.append('virial_parameter_name=dft_virial')
        exe_command.append('gp_file={}'.format(xml_filename))

        with ScratchDir('.'):
            self.write_cfgs(filename=atoms_filename, cfg_pool=train_pool)

            p = subprocess.Popen(exe_command, stdout=subprocess.PIPE)
            stdout = p.communicate()[0]
            rc = p.returncode
            if rc != 0:
                error_msg = 'gap_fit exited with return code %d' % rc
                msg = stdout.decode("utf-8").split('\n')[:-1]
                try:
                    error_line = [
                        i for i, m in enumerate(msg) if m.startswith('ERROR')
                    ][0]
                    error_msg += ', '.join(msg[error_line:])
                except Exception:
                    error_msg += msg[-1]
                raise RuntimeError(error_msg)

            def get_xml(xml_file):
                tree = ET.parse(xml_file)
                root = tree.getroot()
                potential_label = root.tag
                element_param = {}
                for gpcoordinates in list(root.iter('gpCoordinates')):
                    gp_descriptor = list(gpcoordinates.iter('descriptor'))[0]
                    gp_descriptor_text = gp_descriptor.findtext('.')
                    Z_pattern = re.compile(' Z=(.*?) ', re.S)
                    element = str(
                        get_el_sp(int(
                            Z_pattern.findall(gp_descriptor_text)[0])))
                    param = np.loadtxt(gpcoordinates.get('sparseX_filename'))
                    element_param[element] = param.tolist()

                return tree, element_param, potential_label

            tree, element_param, potential_label = get_xml(xml_filename)
            self.param['xml'] = tree
            self.param['element_param'] = element_param
            self.param['potential_label'] = potential_label

        return rc
Exemplo n.º 25
0
def get_wf_hubbard_hund_linresp(structure,
                                user_incar_settings=None,
                                relax_nonmagnetic=True,
                                spin_polarized=True,
                                applied_potential_range=(-0.2, 0.2),
                                num_evals=9,
                                site_indices_perturb=None,
                                species_perturb=None,
                                find_nearest_sites=True,
                                parallel_scheme=0,
                                ediff_tight=None,
                                c=None):
    """
    Compute Hubbard U (and Hund J) on-site interaction values using GGA+U
    linear response method proposed by Cococcioni et. al.
    (DOI: 10.1103/PhysRevB.71.035105)
    and the spin-polarized response formalism developed by Linscott et. al.
    (DOI: 10.1103/PhysRevB.98.235157)

    This workflow relies on the constrained on-site potential functional implemented in VASP, 
    with a helpful tutorial found here: 
    https://www.vasp.at/wiki/index.php/Calculate_U_for_LSDA%2BU

    Args:
        structure:
        user_incar_settings: user INCAR settings
        relax_nonmagnetic: Restart magnetic SCF runs from 
    non-magnetic calculation, using WAVECAR
        spin_polarized: Perform spin-dependent perturbations
        applied_potential_range: Bounds of applied potential
        num_evals: Number of perturbation evalutaions
        site_indices_perturb: (must specify if species_perturb=None) 
    List of site indices within
    Structure indicating perturbation sites; 
        species_perturb: (must specify if site_indices_perturb=None) 
    List of names of species (string)
    of sites to perturb; First site of that species
    is selected in the structure
        find_nearest_sites: If set to true and species_perturb != None, 
    the closest sites (by the Structure distance matrix) will be selected 
    in the response analysis to account for inter-site screening effects
        parallel_scheme: 0 - (default) self-consistent (SCF)
    runs use WAVECAR from non-self consistent (NSCF) run
    at same applied potential; 1 - SCF runs use WAVECAR
    from ground-state (V=0) run. 
    While reusing the WAVECAR from NSCF run in SCF run may be more 
    efficient (parallel_scheme: 0), the user may also choose to 
    remove the dependency between NSCF and SCF runs 
    (parallel_scheme: 1)
        ediff_tight: Final energy convergence tolerance, 
    if restarting from a previous run
    (if not specified, will default to pymatgen default EDIFF)
        c: Workflow config dict, in the same format
    as in presets/core.py and elsewhere in atomate

    Returns: Workflow
    """

    if not structure.is_ordered:
        raise ValueError(
            "Please obtain an ordered approximation of the input structure.")

    if not site_indices_perturb:
        site_indices_perturb = []

    if species_perturb:

        if find_nearest_sites:
            site_indices_perturb = find_closest_sites(structure,
                                                      species_perturb)
        else:
            for specie_u in species_perturb:
                found_specie = False
                for s in range(len(structure)):
                    site = structure[s]
                    if (Element(str(site.specie)) == Element(specie_u)) \
                       and (s not in site_indices_perturb):
                        found_specie = True
                        break
                if not found_specie:
                    raise ValueError("Could not find specie(s) in structure.")
                site_indices_perturb.append(s)

    elif not site_indices_perturb:
        logger.warning("Sites for computing U value are not specified. "
                       "Computing U for first site in structure. ")

    site_indices_perturb = list(tuple(site_indices_perturb))
    num_perturb = len(site_indices_perturb)

    sites_perturb = []
    for site_index_perturb in site_indices_perturb:
        site = structure[site_index_perturb]
        sites_perturb.append(site)

    structure.remove_sites(indices=site_indices_perturb)

    for site in sites_perturb:
        structure.insert(i=0,
                         species=site.specie,
                         coords=site.frac_coords,
                         properties=site.properties)

    # using a uuid for book-keeping,
    # in a similar way to other workflows
    uuid = str(uuid4())

    c_defaults = {"vasp_cmd": VASP_CMD, "db_file": DB_FILE}
    if c:
        c.update(c_defaults)
    else:
        c = c_defaults

    # Calculate groundstate

    # set user_incar_settings
    if not user_incar_settings:
        user_incar_settings = {}

    # setup VASP input sets
    uis_gs, uis_ldau, val_dict, vis_ldau = init_linresp_input_sets(
        user_incar_settings, structure, num_perturb)

    fws = []
    index_fw_gs = [0]

    ediff_default = vis_ldau.incar['EDIFF']
    if not ediff_tight:
        ediff_tight = 0.1 * ediff_default

    append_linresp_ground_state_fws(fws, structure, num_perturb, index_fw_gs,
                                    uis_gs, relax_nonmagnetic, ediff_default,
                                    ediff_tight)

    # generate list of applied on-site potentials in linear response
    applied_potential_value_list = []
    for counter_perturb in range(num_perturb):
        applied_potential_values = np.linspace(applied_potential_range[0],
                                               applied_potential_range[1],
                                               num_evals)
        applied_potential_values = np.around(applied_potential_values,
                                             decimals=9)

        if 0.0 in applied_potential_values:
            applied_potential_values = list(applied_potential_values)
            applied_potential_values.pop(applied_potential_values.index(0.0))
            applied_potential_values = np.array(applied_potential_values)

        applied_potential_value_list.append(applied_potential_values.copy())

    for counter_perturb in range(num_perturb):

        applied_potential_values = applied_potential_value_list[
            counter_perturb]

        for v in applied_potential_values:

            append_linresp_perturb_fws(v, fws, structure, counter_perturb,
                                       num_perturb, index_fw_gs, uis_ldau,
                                       val_dict, spin_polarized,
                                       relax_nonmagnetic, ediff_default,
                                       ediff_tight, parallel_scheme)

    wf = Workflow(fws)

    fw_analysis = Firework(
        HubbardHundLinRespToDb(num_perturb=num_perturb,
                               spin_polarized=spin_polarized,
                               relax_nonmagnetic=relax_nonmagnetic,
                               db_file=DB_FILE,
                               wf_uuid=uuid),
        name="HubbardHundLinRespToDb",
    )

    wf.append_wf(Workflow.from_Firework(fw_analysis), wf.leaf_fw_ids)

    wf = add_common_powerups(wf, c)

    if c.get("ADD_WF_METADATA", ADD_WF_METADATA):
        wf = add_wf_metadata(wf, structure)

    wf = add_additional_fields_to_taskdocs(
        wf,
        {
            "wf_meta": {
                "wf_uuid": uuid,
                "wf_name": "hubbard_hund_linresp",
                "wf_version": __hubbard_hund_linresp_wf_version__,
            }
        },
    )

    return wf
Exemplo n.º 26
0
    def displacement_energies(self, structure, potential, displacement_energies_schema, supercell=(1, 1, 1), tollerance=0.1, max_displacement_energy=75, resolution=1, num_steps=1000, site_radius=0.5, timestep=0.001):
        """ Calculate displacement energy for each atom.

        Uses bisection method to determine displacement energy.
        """
        def ev2Aps(Z, energy):
            # sqrt((2 * energy[eV] [J/eV]) / (amu [g/mole] [kg/g])) * [m/s] [A/ps]
            return math.sqrt((2 * energy * 1.6021766208e-19) / (Z / (6.02214085e23 * 1e3))) * 1e-2

        if self.calculator_type == 'lammps':
            logger.warning('"lammps" calculator is depriciated use "lammps_cython" cannot promise working')
            relax_lammps_script = load_lammps_set('nve')
            relax_lammps_script['thermo'] = []
            relax_lammps_script
            relax_lammps_script['timestep'] = timestep # fs
            relax_lammps_script['run'] = num_steps
            kwargs = {'lammps_set': relax_lammps_script}
        elif self.calculator_type == 'lammps_cython':
            kwargs = {'lammps_additional_commands': [
                'timestep %f' % timestep,
                'velocity all zero linear',
                'fix 1 all nve',
                'run %d' % num_steps
            ]}

        energies = {}
        displacement_energies_schema = displacement_energies_schema.copy()
        for displacement_energy_name, d in displacement_energies_schema.items():
            base_structure = structure.copy()
            v = base_structure.lattice.get_cartesian_coords(d['direction'])
            cart_coords = base_structure.lattice.get_cartesian_coords(d['position'])
            base_structure = base_structure * supercell
            site = base_structure.get_sites_in_sphere(cart_coords, tollerance)[0][0]
            original_positions = base_structure.cart_coords
            original_frac_positions = base_structure.lattice.get_fractional_coords(original_positions)
            index = base_structure.index(site)

            min_energy, max_energy = 0.0, max_displacement_energy
            guess_energy = None
            while abs(max_energy - min_energy) > resolution:
                guess_energy = (max_energy - min_energy) / 2 + min_energy
                velocity = (v / np.linalg.norm(v)) * ev2Aps(Element(d['element']).atomic_mass, guess_energy)
                velocities = np.zeros((len(base_structure), 3))
                velocities[index] = velocity
                base_structure.add_site_property('velocities', velocities)

                async def calculate():
                    future = await self.calculator.submit(
                        base_structure, potential,
                        properties={'positions', 'initial_positions'},
                        **kwargs)
                    await future
                    return future.result()

                print('starting calculation (displacement energy): %s ion %s velocity: %f [eV] %f [A/ps]' % (displacement_energy_name, d['element'], guess_energy, ev2Aps(Element(d['element']).atomic_mass, guess_energy)))
                result = self._run_async_func(calculate())
                initial_frac_positions = base_structure.lattice.get_fractional_coords(result['results']['initial_positions'])
                final_frac_positions = base_structure.lattice.get_fractional_coords(result['results']['positions'])
                displacements = np.linalg.norm(
                    base_structure.lattice.get_cartesian_coords(
                        pbc_diff(final_frac_positions, initial_frac_positions)), axis=1)
                is_original_state = np.all(displacements < site_radius)
                print('finished calculation (displacement energy): %s resulted in ground_state (%s) max displacment %f [A] median %f [A] min %f [A]' % (displacement_energy_name, is_original_state, np.max(displacements), np.median(displacements), np.min(displacements)))
                if is_original_state:
                    min_energy = guess_energy
                else:
                    max_energy = guess_energy

            energies[displacement_energy_name] = guess_energy
        return energies
Exemplo n.º 27
0
from pymatgen.core import Composition, Element
from pymatgen.ext.matproj import MPRester

key_element = 'Li'
target_phase = 'BeO'

chemsys = key_element + '-' + Composition(target_phase).chemical_system
with MPRester(api_key='') as mpr:
    entries = mpr.get_entries_in_chemsys(chemsys)

pd = PhaseDiagram(entries)

for facet in pd.facets:
    # read-made, chempots = pd._get_facet_chempots(facet)
    # implement again
    namelist = [pd.qhull_entries[i].name for i in facet]
    complist = [pd.qhull_entries[i].composition for i in facet]
    energylist = [pd.qhull_entries[i].energy_per_atom for i in facet]
    m = [[c.get_atomic_fraction(e) for e in pd.elements] for c in
            complist]
    # solve a linear matrix equation, sum(μ*n) = E
    chempots = np.linalg.solve(m, energylist)
    chempots = dict(zip(pd.elements, chempots))
    mu = chempots[Element(key_element)]
    # print chemical potential of key element in each equilibrium
    print('-'.join(namelist)+': '+str(np.round(mu,3)))

# print transition points of phase equilibria in terms of chemical potential
chempotlist = np.round(pd.get_transition_chempots(Element(key_element)), 3)
print(chempotlist)
Exemplo n.º 28
0
    def get_atom_feature(
            self,
            mol,
            atom  # type: ignore
    ) -> Dict:  # type: ignore
        """
        Generate all features of a particular atom

        Args:
            mol (pybel.Molecule): Molecule being evaluated
            atom (pybel.Atom): Specific atom being evaluated
        Return:
            (dict): All features for that atom
        """

        # Get the link to the OpenBabel representation of the atom
        obatom = atom.OBAtom
        atom_idx = atom.idx - 1  # (pybel atoms indices start from 1)

        # Get the element
        element = Element.from_Z(obatom.GetAtomicNum()).symbol

        # Get the fast-to-compute properties
        output = {
            "element":
            element,
            "atomic_num":
            obatom.GetAtomicNum(),
            "formal_charge":
            obatom.GetFormalCharge(),
            "hybridization":
            6 if element == "H" else obatom.GetHyb(),
            "acceptor":
            obatom.IsHbondAcceptor(),
            "donor":
            obatom.IsHbondDonorH()
            if atom.type == "H" else obatom.IsHbondDonor(),
            "aromatic":
            obatom.IsAromatic(),
            "coordid":
            atom.coordidx,
        }

        # Get the chirality, if desired
        if "chirality" in self.atom_features:
            # Determine whether the molecule has chiral centers
            chiral_cc = self._get_chiral_centers(mol)
            if atom_idx not in chiral_cc:
                output["chirality"] = 0
            else:
                # 1 --> 'R', 2 --> 'S'
                output["chirality"] = 1 if chiral_cc[atom_idx] == "R" else 2

        # Find the rings, if desired
        if "ring_sizes" in self.atom_features:
            rings = mol.OBMol.GetSSSR(
            )  # OpenBabel caches ring computation internally, no need to cache ourselves
            output["ring_sizes"] = [
                r.Size() for r in rings if r.IsInRing(atom.idx)
            ]
        return output
Exemplo n.º 29
0
    def _parse(self, filename):
        start_patt = re.compile(" \(Enter \S+l101\.exe\)")
        route_patt = re.compile(" #[pPnNtT]*.*")
        link0_patt = re.compile("^\s(%.+)\s*=\s*(.+)")
        charge_mul_patt = re.compile("Charge\s+=\s*([-\\d]+)\s+"
                                     "Multiplicity\s+=\s*(\d+)")
        num_basis_func_patt = re.compile("([0-9]+)\s+basis functions")
        num_elec_patt = re.compile(
            "(\d+)\s+alpha electrons\s+(\d+)\s+beta electrons")
        pcm_patt = re.compile("Polarizable Continuum Model")
        stat_type_patt = re.compile("imaginary frequencies")
        scf_patt = re.compile("E\(.*\)\s*=\s*([-\.\d]+)\s+")
        mp2_patt = re.compile("EUMP2\s*=\s*(.*)")
        oniom_patt = re.compile("ONIOM:\s+extrapolated energy\s*=\s*(.*)")
        termination_patt = re.compile("(Normal|Error) termination")
        error_patt = re.compile(
            "(! Non-Optimized Parameters !|Convergence failure)")
        mulliken_patt = re.compile(
            "^\s*(Mulliken charges|Mulliken atomic charges)")
        mulliken_charge_patt = re.compile('^\s+(\d+)\s+([A-Z][a-z]?)\s*(\S*)')
        end_mulliken_patt = re.compile(
            '(Sum of Mulliken )(.*)(charges)\s*=\s*(\D)')
        std_orientation_patt = re.compile("Standard orientation")
        end_patt = re.compile("--+")
        orbital_patt = re.compile("(Alpha|Beta)\s*\S+\s*eigenvalues --(.*)")
        thermo_patt = re.compile("(Zero-point|Thermal) correction(.*)="
                                 "\s+([\d\.-]+)")
        forces_on_patt = re.compile(
            "Center\s+Atomic\s+Forces\s+\(Hartrees/Bohr\)")
        forces_off_patt = re.compile("Cartesian\s+Forces:\s+Max.*RMS.*")
        forces_patt = re.compile(
            "\s+(\d+)\s+(\d+)\s+([0-9\.-]+)\s+([0-9\.-]+)\s+([0-9\.-]+)")

        freq_on_patt = re.compile(
            "Harmonic\sfrequencies\s+\(cm\*\*-1\),\sIR\sintensities.*Raman.*")
        freq_patt = re.compile("Frequencies\s--\s+(.*)")
        normal_mode_patt = re.compile(
            "\s+(\d+)\s+(\d+)\s+([0-9\.-]{4,5})\s+([0-9\.-]{4,5}).*")

        mo_coeff_patt = re.compile("Molecular Orbital Coefficients:")
        mo_coeff_name_patt = re.compile(
            "\d+\s((\d+|\s+)\s+([a-zA-Z]{1,2}|\s+))\s+(\d+\S+)")

        self.properly_terminated = False
        self.is_pcm = False
        self.stationary_type = "Minimum"
        self.structures = []
        self.corrections = {}
        self.energies = []
        self.pcm = None
        self.errors = []
        self.Mulliken_charges = {}
        self.link0 = {}
        self.cart_forces = []
        self.frequencies = []
        self.eigenvalues = []
        self.is_spin = False

        coord_txt = []
        read_coord = 0
        read_mulliken = False
        read_eigen = False
        eigen_txt = []
        parse_stage = 0
        num_basis_found = False
        terminated = False
        parse_forces = False
        forces = []
        parse_freq = False
        frequencies = []
        read_mo = False

        with zopen(filename) as f:
            for line in f:
                if parse_stage == 0:
                    if start_patt.search(line):
                        parse_stage = 1
                    elif link0_patt.match(line):
                        m = link0_patt.match(line)
                        self.link0[m.group(1)] = m.group(2)
                    elif route_patt.search(line):
                        params = read_route_line(line)
                        self.functional = params[0]
                        self.basis_set = params[1]
                        self.route = params[2]
                        self.dieze_tag = params[3]
                        parse_stage = 1
                elif parse_stage == 1:
                    if charge_mul_patt.search(line):
                        m = charge_mul_patt.search(line)
                        self.charge = int(m.group(1))
                        self.spin_mult = int(m.group(2))
                        parse_stage = 2
                elif parse_stage == 2:

                    if self.is_pcm:
                        self._check_pcm(line)

                    if "FREQ" in self.route and thermo_patt.search(line):
                        m = thermo_patt.search(line)
                        if m.group(1) == "Zero-point":
                            self.corrections["Zero-point"] = float(m.group(3))
                        else:
                            key = m.group(2).strip(" to ")
                            self.corrections[key] = float(m.group(3))

                    if read_coord:
                        if not end_patt.search(line):
                            coord_txt.append(line)
                        else:
                            read_coord = (read_coord + 1) % 4
                            if not read_coord:
                                sp = []
                                coords = []
                                for l in coord_txt[2:]:
                                    toks = l.split()
                                    sp.append(Element.from_Z(int(toks[1])))
                                    coords.append(
                                        [float(i) for i in toks[3:6]])
                                self.structures.append(Molecule(sp, coords))

                    if parse_forces:
                        m = forces_patt.search(line)
                        if m:
                            forces.extend(
                                [float(_v) for _v in m.groups()[2:5]])
                        elif forces_off_patt.search(line):
                            self.cart_forces.append(forces)
                            forces = []
                            parse_forces = False

                    # read molecular orbital eigenvalues
                    if read_eigen:
                        m = orbital_patt.search(line)
                        if m:
                            eigen_txt.append(line)
                        else:
                            read_eigen = False
                            self.eigenvalues = {Spin.up: []}
                            for eigenline in eigen_txt:
                                if "Alpha" in eigenline:
                                    self.eigenvalues[Spin.up] += [
                                        float(e)
                                        for e in float_patt.findall(eigenline)
                                    ]
                                elif "Beta" in eigenline:
                                    if Spin.down not in self.eigenvalues:
                                        self.eigenvalues[Spin.down] = []
                                    self.eigenvalues[Spin.down] += [
                                        float(e)
                                        for e in float_patt.findall(eigenline)
                                    ]
                            eigen_txt = []

                    # read molecular orbital coefficients
                    if read_mo:
                        # build a matrix with all coefficients
                        all_spin = [Spin.up]
                        if self.is_spin:
                            all_spin.append(Spin.down)

                        mat_mo = {}
                        for spin in all_spin:
                            mat_mo[spin] = np.zeros(
                                (self.num_basis_func, self.num_basis_func))
                            nMO = 0
                            end_mo = False
                            while nMO < self.num_basis_func and not end_mo:
                                f.readline()
                                f.readline()
                                self.atom_basis_labels = []
                                for i in range(self.num_basis_func):
                                    line = f.readline()

                                    # identify atom and OA labels
                                    m = mo_coeff_name_patt.search(line)
                                    if m.group(1).strip() != "":
                                        iat = int(m.group(2)) - 1
                                        # atname = m.group(3)
                                        self.atom_basis_labels.append(
                                            [m.group(4)])
                                    else:
                                        self.atom_basis_labels[iat].append(
                                            m.group(4))

                                    # MO coefficients
                                    coeffs = [
                                        float(c)
                                        for c in float_patt.findall(line)
                                    ]
                                    for j in range(len(coeffs)):
                                        mat_mo[spin][i, nMO + j] = coeffs[j]

                                nMO += len(coeffs)
                                line = f.readline()
                                # manage pop=regular case (not all MO)
                                if nMO < self.num_basis_func and \
                                    ("Density Matrix:" in line or mo_coeff_patt.search(line)):
                                    end_mo = True
                                    warnings.warn(
                                        "POP=regular case, matrix coefficients not complete"
                                    )
                            f.readline()

                        self.eigenvectors = mat_mo
                        read_mo = False

                        # build a more convenient array dict with MO coefficient of
                        # each atom in each MO.
                        # mo[Spin][OM j][atom i] = {AO_k: coeff, AO_k: coeff ... }
                        mo = {}
                        for spin in all_spin:
                            mo[spin] = [[
                                {}
                                for iat in range(len(self.atom_basis_labels))
                            ] for j in range(self.num_basis_func)]
                            for j in range(self.num_basis_func):
                                i = 0
                                for iat in range(len(self.atom_basis_labels)):
                                    for label in self.atom_basis_labels[iat]:
                                        mo[spin][j][iat][
                                            label] = self.eigenvectors[spin][i,
                                                                             j]
                                        i += 1

                        self.molecular_orbital = mo

                    elif parse_freq:
                        m = freq_patt.search(line)
                        if m:
                            values = [
                                float(_v) for _v in m.groups()[0].split()
                            ]
                            for value in values:
                                frequencies.append([value, []])
                        elif normal_mode_patt.search(line):
                            values = [float(_v) for _v in line.split()[2:]]
                            n = int(len(values) / 3)
                            for i in range(0, len(values), 3):
                                j = -n + int(i / 3)
                                frequencies[j][1].extend(values[i:i + 3])
                        elif line.find("-------------------") != -1:
                            parse_freq = False
                            self.frequencies.append(frequencies)
                            frequencies = []

                    elif termination_patt.search(line):
                        m = termination_patt.search(line)
                        if m.group(1) == "Normal":
                            self.properly_terminated = True
                            terminated = True
                    elif error_patt.search(line):
                        error_defs = {
                            "! Non-Optimized Parameters !": "Optimization "
                            "error",
                            "Convergence failure": "SCF convergence error"
                        }
                        m = error_patt.search(line)
                        self.errors.append(error_defs[m.group(1)])
                    elif (not num_basis_found) and \
                            num_basis_func_patt.search(line):
                        m = num_basis_func_patt.search(line)
                        self.num_basis_func = int(m.group(1))
                        num_basis_found = True
                    elif num_elec_patt.search(line):
                        m = num_elec_patt.search(line)
                        self.electrons = (int(m.group(1)), int(m.group(2)))
                    elif (not self.is_pcm) and pcm_patt.search(line):
                        self.is_pcm = True
                        self.pcm = {}
                    elif "FREQ" in self.route and "OPT" in self.route and \
                            stat_type_patt.search(line):
                        self.stationary_type = "Saddle"
                    elif mp2_patt.search(line):
                        m = mp2_patt.search(line)
                        self.energies.append(
                            float(m.group(1).replace("D", "E")))
                    elif oniom_patt.search(line):
                        m = oniom_patt.matcher(line)
                        self.energies.append(float(m.group(1)))
                    elif scf_patt.search(line):
                        m = scf_patt.search(line)
                        self.energies.append(float(m.group(1)))
                    elif std_orientation_patt.search(line):
                        coord_txt = []
                        read_coord = 1
                    elif not read_eigen and orbital_patt.search(line):
                        eigen_txt.append(line)
                        read_eigen = True
                    elif mulliken_patt.search(line):
                        mulliken_txt = []
                        read_mulliken = True
                    elif not parse_forces and forces_on_patt.search(line):
                        parse_forces = True
                    elif freq_on_patt.search(line):
                        parse_freq = True
                    elif mo_coeff_patt.search(line):
                        if "Alpha" in line:
                            self.is_spin = True
                        read_mo = True

                    if read_mulliken:
                        if not end_mulliken_patt.search(line):
                            mulliken_txt.append(line)
                        else:
                            m = end_mulliken_patt.search(line)
                            mulliken_charges = {}
                            for line in mulliken_txt:
                                if mulliken_charge_patt.search(line):
                                    m = mulliken_charge_patt.search(line)
                                    dict = {
                                        int(m.group(1)):
                                        [m.group(2),
                                         float(m.group(3))]
                                    }
                                    mulliken_charges.update(dict)
                            read_mulliken = False
                            self.Mulliken_charges = mulliken_charges

        if not terminated:
            #raise IOError("Bad Gaussian output file.")
            warnings.warn("\n" + self.filename + \
                ": Termination error or bad Gaussian output file !")
Exemplo n.º 30
0
 def test_get_property(self):
     self.assertAlmostEqual(
         9.012182,
         self.data_source.get_elemental_property(Element("Be"),
                                                 "AtomicWeight"))
Exemplo n.º 31
0
def Bk_symbol():
    return [str(Element.from_Z(97))]
Exemplo n.º 32
0
    def process_item(self, item):
        """
        Read the entries from the thermo database and group them based on the reduced composition
        of the framework material (without working ion).
        Args:
            chemsys(string): the chemical system string to be queried
        returns:
            (chemsys, [group]): entry contains a list of entries the materials together by composition
        """
        # sort the entries intro subgroups
        # then perform PD analysis
        all_entries = item['all_entries']
        pd_ents = item['pd_ents']
        phdi = PhaseDiagram(pd_ents)

        # The working ion entries
        ents_wion = list(
            filter(
                lambda x: x.composition.get_integer_formula_and_factor()[0] ==
                self.working_ion, pd_ents))
        self.working_ion_entry = min(ents_wion,
                                     key=lambda e: e.energy_per_atom)
        assert (self.working_ion_entry != None)

        grouped_entries = list(self.get_sorted_subgroups(all_entries))
        docs = []  # results

        for group in grouped_entries:
            self.logger.debug(
                f"Grouped entries in all sandboxes {', '.join([en.name for en in group])}"
            )
            for en in group:
                # skip this d_muO2 stuff if you do note have oxygen
                if Element('O') in en.composition.elements:
                    d_muO2 = [{
                        'reaction': str(itr['reaction']),
                        'chempot': itr['chempot'],
                        'evolution': itr['evolution']
                    } for itr in phdi.get_element_profile('O', en.composition)]
                else:
                    d_muO2 = None
                en.data['muO2'] = d_muO2
                en.data['decomposition_energy'] = phdi.get_e_above_hull(en)

            # sort out the sandboxes
            # for each sandbox core+sandbox will both contribute entries
            all_sbx = [ent.data['sbxn'] for ent in group]
            all_sbx = set(chain.from_iterable(all_sbx))
            self.logger.debug(f"All sandboxes {', '.join(list(all_sbx))}")

            for isbx in all_sbx:
                group_sbx = list(
                    filter(
                        lambda ent: (isbx in ent.data['sbxn']) or (ent.data[
                            'sbxn'] == ['core']), group))
                # Need more than one level of lithiation to define a electrode material
                if len(group_sbx) == 1:
                    continue
                self.logger.debug(
                    f"Grouped entries in sandbox {isbx} -- {', '.join([en.name for en in group_sbx])}"
                )
                try:
                    result = InsertionElectrode(group_sbx,
                                                self.working_ion_entry)
                    assert (len(result._stable_entries) > 1)
                except:
                    self.logger.warn(
                        f"Not able to generate a  entries in sandbox {isbx} using the following entires-- {', '.join([en.entry_id for en in group_sbx])}"
                    )
                    continue

                spacegroup = SpacegroupAnalyzer(
                    result.get_stable_entries(
                        charge_to_discharge=True)[0].structure)
                d = result.as_dict_summary()
                ids = [entry.entry_id for entry in result.get_all_entries()]
                lowest_id = sorted(ids, key=lambda x: x.split('-')[-1])[0]
                d['spacegroup'] = {
                    k: spacegroup._space_group_data[k]
                    for k in sg_fields
                }

                if isbx == 'core':
                    d['battid'] = lowest_id + '_' + self.working_ion
                else:
                    d['battid'] = lowest_id + '_' + self.working_ion + '_' + isbx
                # Only allow one sandbox value for each electrode
                d['sbxn'] = [isbx]

                docs.append(d)

        return docs
Exemplo n.º 33
0
 def test_get_oxidation(self):
     self.assertEqual([-4, 2, 4],
                      self.data_source.get_oxidation_states(Element("C")))