示例#1
0
    def test_dict_to_object(self):
        coords = [[0, 0, 0], [0.75, 0.5, 0.75]]
        lattice = Lattice([[3.8401979337, 0.00, 0.00],
                           [1.9200989668, 3.3257101909, 0.00],
                           [0.00, -2.2171384943, 3.1355090603]])
        struct = Structure(lattice, ["Si"] * 2, coords)
        d = {'structure_dict': [struct.as_dict(), struct.as_dict()]}
        df = DataFrame(data=d)

        df["structure"] = dict_to_object(df["structure_dict"])
        self.assertEqual(df["structure"].tolist()[0], struct)
        self.assertEqual(df["structure"].tolist()[1], struct)
示例#2
0
    def test_dict_to_object(self):
        coords = [[0, 0, 0], [0.75, 0.5, 0.75]]
        lattice = Lattice([[3.8401979337, 0.00, 0.00],
                           [1.9200989668, 3.3257101909, 0.00],
                           [0.00, -2.2171384943, 3.1355090603]])
        struct = Structure(lattice, ["Si"] * 2, coords)
        d = {'structure_dict': [struct.as_dict(), struct.as_dict()]}
        df = DataFrame(data=d)

        df["structure"] = dict_to_object(df["structure_dict"])
        self.assertEqual(df["structure"].tolist()[0], struct)
        self.assertEqual(df["structure"].tolist()[1], struct)
示例#3
0
    def read_cfgs(self, filename, symbol):
        """
        Read the configuration file.

        Args:
            filename (str): The configuration file to be read.
            symbol (str): The element symbol.
        """
        data_pool = []
        with zopen(filename, 'rt') as f:
            lines = f.read()

        block_pattern = re.compile('BEGIN_CFG\n(.*?)\nEND_CFG', re.S)
        size_pattern = re.compile('Size\n(.*?)\n SuperCell', re.S | re.I)
        lattice_pattern = re.compile('SuperCell\n(.*?)\n AtomData',
                                     re.S | re.I)
        position_pattern = re.compile('fz\n(.*?)\n Energy', re.S)
        energy_pattern = re.compile('Energy\n(.*?)\n (?=PlusStress|Stress)',
                                    re.S)
        stress_pattern = re.compile('xy\n(.*?)(?=\n|$)', re.S)
        formatify = lambda string: [float(s) for s in string.split()]
        for block in block_pattern.findall(lines):
            d = {'outputs': {}}
            size_str = size_pattern.findall(block)[0]
            size = int(size_str.lstrip())
            lattice_str = lattice_pattern.findall(block)[0]
            lattice = Lattice(
                np.array(list(map(formatify, lattice_str.split('\n')))))
            position_str = position_pattern.findall(block)[0]
            position = np.array(list(map(formatify, position_str.split('\n'))))
            forces = position[:, 5:8].tolist()
            position = position[:, 2:5]
            energy_str = energy_pattern.findall(block)[0]
            energy = float(energy_str.lstrip())
            stress_str = stress_pattern.findall(block)[0]
            virial_stress = np.array(list(map(formatify,
                                              stress_str.split()))).reshape(
                                                  6, ).tolist()
            virial_stress = [
                virial_stress[self.mtp_stress_order.index(n)]
                for n in self.vasp_stress_order
            ]
            struct = Structure(lattice=lattice,
                               species=[symbol] * size,
                               coords=position,
                               coords_are_cartesian=True)
            d['structure'] = struct.as_dict()
            d['outputs']['energy'] = energy
            assert size == struct.num_sites
            d['num_atoms'] = size
            d['outputs']['forces'] = forces
            d['outputs']['virial_stress'] = virial_stress

            data_pool.append(d)
        _, df = convert_docs(docs=data_pool)
        return data_pool, df
示例#4
0
    def test_conversion_multiindex_dynamic(self):
        # test dynamic target_col_id setting with multiindex

        coords = [[0, 0, 0], [0.75, 0.5, 0.75]]
        lattice = Lattice([[3.8401979337, 0.00, 0.00],
                           [1.9200989668, 3.3257101909, 0.00],
                           [0.00, -2.2171384943, 3.1355090603]])
        struct = Structure(lattice, ["Si"] * 2, coords)
        d = {'structure_dict': [struct.as_dict(), struct.as_dict()]}
        df_2lvl = DataFrame(data=d)
        df_2lvl.columns = MultiIndex.from_product((["custom"],
                                                   df_2lvl.columns.values))

        dto = DictToObject()
        df_2lvl = dto.featurize_dataframe(df_2lvl, ('custom', 'structure_dict'),
                                          multiindex=True)
        new_col_id = ('DictToObject', 'structure_dict_object')
        self.assertEqual(df_2lvl[new_col_id].tolist()[0], struct)
        self.assertEqual(df_2lvl[new_col_id].tolist()[1], struct)
示例#5
0
    def test_dict_to_object(self):
        coords = [[0, 0, 0], [0.75, 0.5, 0.75]]
        lattice = Lattice([[3.8401979337, 0.00, 0.00],
                           [1.9200989668, 3.3257101909, 0.00],
                           [0.00, -2.2171384943, 3.1355090603]])
        struct = Structure(lattice, ["Si"] * 2, coords)
        d = {'structure_dict': [struct.as_dict(), struct.as_dict()]}
        df = DataFrame(data=d)

        dto = DictToObject(target_col_id='structure')
        df = dto.featurize_dataframe(df, 'structure_dict')
        self.assertEqual(df["structure"].tolist()[0], struct)
        self.assertEqual(df["structure"].tolist()[1], struct)

        # test dynamic target_col_id setting
        df = DataFrame(data=d)
        dto = DictToObject()
        df = dto.featurize_dataframe(df, 'structure_dict')
        self.assertEqual(df["structure_dict_object"].tolist()[0], struct)
        self.assertEqual(df["structure_dict_object"].tolist()[1], struct)
示例#6
0
 def final_structure(self):
     final_unit_cell = self.final_unit_cell
     final_atomic_positions = self.final_atomic_positions
     if (final_unit_cell and final_atomic_positions):
         species = [
             ''.join([l for l in i[0] if not l.isdigit()])
             for i in final_atomic_positions
         ]
         positions = [j[1] for j in final_atomic_positions]
         final_structure = Structure(final_unit_cell, species, positions)
         return final_structure.as_dict()
     else:
         return None
示例#7
0
    def read_cfgs(self, filename='output.data'):
        """
        Args:
            filename (str): The configuration file to be read.
        """
        data_pool = []
        with zopen(filename, 'rt') as f:
            lines = f.read()

        block_pattern = re.compile('begin\n(.*?)end', re.S)
        lattice_pattern = re.compile('lattice(.*?)\n')
        position_pattern = re.compile('atom(.*?)\n')
        energy_pattern = re.compile('energy(.*?)\n')

        for block in block_pattern.findall(lines):
            d = {'outputs': {}}
            lattice_str = lattice_pattern.findall(block)
            lattice = Lattice(
                np.array([latt.split() for latt in lattice_str],
                         dtype=np.float) * self.bohr_to_angstrom)
            position_str = position_pattern.findall(block)
            positions = pd.DataFrame([pos.split() for pos in position_str])
            positions.columns = \
                ['x', 'y', 'z', 'specie', 'charge', 'atomic_energy', 'fx', 'fy', 'fz']
            coords = np.array(positions.loc[:, ['x', 'y', 'z']],
                              dtype=np.float)
            coords = coords * self.bohr_to_angstrom
            species = np.array(positions['specie'])
            forces = np.array(positions.loc[:, ['fx', 'fy', 'fz']],
                              dtype=np.float)
            forces = forces / self.eV_to_Ha / self.bohr_to_angstrom
            energy_str = energy_pattern.findall(block)[0]
            energy = float(energy_str.lstrip()) / self.eV_to_Ha
            struct = Structure(lattice=lattice,
                               species=species,
                               coords=coords,
                               coords_are_cartesian=True)
            d['structure'] = struct.as_dict()
            d['outputs']['energy'] = energy
            d['outputs']['forces'] = forces
            d['num_atoms'] = len(struct)

            data_pool.append(d)
        _, df = convert_docs(docs=data_pool)
        return data_pool, df
    def setUp(self):
        coords = list()
        coords.append([0, 0, 0])
        coords.append([0.75, 0.5, 0.75])
        lattice = [
            [3.8401979337, 0.00, 0.00],
            [1.9200989668, 3.3257101909, 0.00],
            [0.00, -2.2171384943, 3.1355090603],
        ]

        structure = Structure(lattice, ["Si", "Si"], coords)

        input_sets = {
            "GGA Structure Optimization": MPRelaxSet(structure),
            "GGA Static": MPStaticSet(structure),
            "GGA NSCF Line": MPNonSCFSet(structure, mode="line"),
            "GGA NSCF Uniform": MPNonSCFSet(structure, mode="uniform"),
        }

        tasks = []
        t_id = 1
        for task_type, input_set in input_sets.items():
            doc = {
                "true_task_type": task_type,
                "last_updated": datetime.now(),
                "task_id": t_id,
                "state": "successful",
                "orig_inputs": {
                    "incar": input_set.incar.as_dict(),
                    "kpoints": input_set.kpoints.as_dict(),
                },
                "output": {
                    "structure": structure.as_dict()
                },
            }
            t_id += 1
            tasks.append(doc)

        self.test_tasks = MemoryStore("tasks")
        self.task_types = MemoryStore("task_types")
        self.test_tasks.connect()
        self.task_types.connect()

        self.test_tasks.update(tasks)
示例#9
0
def extract(entry):
    struct = entry.structure
    spacegroup = entry.spacegroup
    pmg_s = Structure(struct.cell, struct.site_compositions,
                      struct.site_coords)

    return OrderedDict([
        ('id', entry.id),
        ('label', entry.label),
        ('proto_label', entry.proto_label),
        ('formula', entry.name),
        ('composition', entry.spec_comp),
        ('reduced_comp', entry.red_comp),
        ('unit_comp', entry.unit_comp),
        ('total_energy_pa', entry.total_energy),
        ('formation_energy_pa', entry.energy),
        ('experiment', entry.composition.experiment),
        ('mass', entry.mass),
        ('stable', entry.stable),
        ('e_above_hull', entry.formationenergy_set.first().stability),
        ('is_ordered', pmg_s.is_ordered),
        ('band_gap', entry.band_gap),
        ('spacegroup', spacegroup.hm),
        ('spacegroup_hall', spacegroup.hall),
        ('spacegroup_id', spacegroup.number),
        ('spacegroup_schoenflies', spacegroup.schoenflies),
        ('is_centro_symmetric', spacegroup.centrosymmetric),
        ('natoms', struct.natoms),
        ('ntypes', struct.ntypes),
        ('nsites', struct.nsites),
        # ('stresses', struct.stresses),
        # ('forces', struct.forces),
        ('volume', struct.volume),
        ('volume_pa', struct.volume_pa),
        # ('magmon', struct.magmom),
        # ('magmon_pa', struct.magmom_pa),
    ]), OrderedDict([('id', entry.id), ('structure', pmg_s.as_dict())])
示例#10
0
    def read_cfgs(self, filename, predict=False):
        """
        Args:
            filename (str): The configuration file to be read.
        """
        type_convert = {'R': np.float32, 'I': np.int, 'S': np.str}
        data_pool = []
        with zopen(filename, 'rt') as f:
            lines = f.read()
        repl = re.compile('AT ')
        lines = repl.sub('', string=lines)

        block_pattern = re.compile(
            '(\n[0-9]+\n|^[0-9]+\n)(.+?)(?=\n[0-9]+\n|$)', re.S)
        lattice_pattern = re.compile('Lattice="(.+)"')
        # energy_pattern = re.compile('dft_energy=(-?[0-9]+.[0-9]+)', re.I)
        energy_pattern = re.compile(
            r'(?<=\S{3}\s|dft_)energy=(-?[0-9]+.[0-9]+)')
        # stress_pattern = re.compile('dft_virial={(.+)}')
        stress_pattern = re.compile('dft_virial=({|)(.+?)(}|) \S.*')
        properties_pattern = re.compile('properties=(\S+)', re.I)
        # position_pattern = re.compile('\n(.+)', re.S)
        position_pattern = re.compile('\n(.+?)(?=\nE.*|\n\n.*|$)', re.S)
        # formatify = lambda string: [float(s) for s in string.split()]

        for (size, block) in block_pattern.findall(lines):
            d = {'outputs': {}}
            size = int(size)
            lattice_str = lattice_pattern.findall(block)[0]
            lattice = Lattice(
                list(map(lambda s: float(s), lattice_str.split())))
            # energy_str = energy_pattern.findall(block)[0]
            energy_str = energy_pattern.findall(block)[-1]
            energy = float(energy_str)
            # stress_str = stress_pattern.findall(block)[0]
            stress_str = stress_pattern.findall(block)[0][1]
            virial_stress = np.array(
                list(map(lambda s: float(s), stress_str.split())))
            virial_stress = [virial_stress[i] for i in [0, 4, 8, 1, 5, 6]]
            properties = properties_pattern.findall(block)[0].split(":")
            labels_columns = OrderedDict()
            labels = defaultdict()
            for i in range(0, len(properties), 3):
                labels_columns[properties[i]] = [
                    int(properties[i + 2]), properties[i + 1]
                ]
            position_str = position_pattern.findall(block)[0].split('\n')
            position = np.array([p.split() for p in position_str])
            column_index = 0
            for key in labels_columns:
                num_columns, dtype = labels_columns[key]
                labels[key] = position[:, column_index:column_index +
                                       num_columns].astype(type_convert[dtype])
                column_index += num_columns
            struct = Structure(lattice=lattice,
                               species=labels['species'].ravel(),
                               coords=labels['pos'],
                               coords_are_cartesian=True)
            if predict:
                forces = labels['force']
            else:
                forces = labels['dft_force']
            d['structure'] = struct.as_dict()
            d['outputs']['energy'] = energy
            assert size == struct.num_sites
            d['num_atoms'] = size
            d['outputs']['forces'] = forces
            d['outputs']['virial_stress'] = virial_stress

            data_pool.append(d)
        _, df = convert_docs(docs=data_pool)
        return data_pool, df
def to_istructure(structure: Structure) -> IStructure:
    return IStructure.from_dict(structure.as_dict())
from pymatgen import Lattice, Structure, Molecule
from json import dump, load
from pymatgen.io import xyz
coords = [[0, 0, 0], [1, 0, 0], [1, 0, 1], [0, 1, 0], [0, 1, 1], [1, 1, 0],
          [0, 0, 1], [1, 1, 1], [0.5, 0, 0.5], [0.5, 1, 0.5], [0, 0.5, 0.5],
          [1, 0.5, 0.5], [0.5, 0.5, 0], [0.5, 0.5, 1]]
lattice = Lattice.from_parameters(a=1, b=1, c=1, alpha=90, beta=90, gamma=90)
struct = Structure(lattice, ["Si" for n in range(14)], coords)
#we need molecule type if we want to write data in a .xyz file
# using pymatgen.io.xyz
Si_FCC = Molecule(["Si" for n in range(14)], coords)
with open('structure.json', 'w') as f:
    dump(struct.as_dict(), f)
    f.close()
with open('structure.json', 'r') as f:
    d = load(f)
    structure = Structure.from_dict(d)
    f.close()
# xyz object
struct_xyz = xyz.XYZ(Si_FCC)
struct_xyz.write_file('structure.xyz')
# if you want to read this file please use:
# h = xyz.XYZ.from_file('structure.xyz')
# h.all_molecules