Exemplo n.º 1
0
    def read(self, verbose=False):
        """
        Wrapper around a "_read_data()" routine which is to be written program-
        specific. The routine "_read_data()" shall return a dictionary holding
        sub-dictionaries (for each point) with all infomation in it.

        to an HDF-5 database at node '/raw_data/<self._prefix>'. Note that
        hyphens ("-") will be replaced by underscores ("_") to maintain the
        "natural naming" feature provided by pytables.

        Parameters
        ----------
        ''verbose''
            Boolean, optional (default = False)
            Print some additional information on the data (which jobs are
            pending and converged, respectively) to stdout.

        Returns
        -------
        Dataframe with the respective raw data
        """

        print('Reading data from:\n\t{}'.format(self.base_dir))
        print('Be patient...')

        data = self._read_data(base_dir=self.base_dir, verbose=verbose)

        print('Read {} points in total'.format(len(data.keys())))

        df = self.create_dataframe(data)
        update_hdf_node(df,
                        '/raw_data/{}/'.format(self._prefix.replace('-', '_')),
                        self.store)

        return df
Exemplo n.º 2
0
    def write_stress_hdf5(self, task, info='', verbose=False):
        """
        Routine that wraps reading and writing to an HDF5 data base.
        The respective node will be determined by `get_inode()`

        Arguments
        ---------
        ''task''
            string
            Task to be analyzed. Will be filtered via `_normalize_task()`.

        ''info''
            string, optional (default = '')
            Additional node information. For instance a pseudopotential flag or
            something when sharing a database. Will be inserted after
            `raw_data` in the node.

        ''verbose''
            Boolean, optional (default : False)
            Print the data frame content to stdout.

        Returns
        -------
        None
        """
        observable = 'stress'

        task = self._normalize_task(task)
        data = self.read_stress(task)
        df = self.create_array_dataframe(data=data,
                                         task=task,
                                         verbose=verbose,
                                         observable=observable)
        node = self.get_inode(task, observable=observable, info=info)
        update_hdf_node(df, node, self.store)
Exemplo n.º 3
0
    def read(self, base_dir = None, node = 'PES', verbose = False, process_resultfolder = None):
        """
        Wrapper around a "_read_data()" routine which is to be written program-
        specific. The routine "_read_data()" shall return a dictionary holding
        sub-dictionaries (for each point) with all infomation in it.

        to an HDF-5 database at node '/raw_data/PES'.

        Parameters
        ----------
        ''base_dir''
            string, optional (default = <self.base_dir>)
            Path to the base directory. Defaults to the standard <base_dir> but
            can be changed if you want to read e.g. a testset.

        ''node''
            string, optional (default = 'PES')
            Node name for the HDF5 database.

        ''verbose''
            Boolean, optional (default = False)
            Print some additional information on the data (which jobs are
            pending and converged, respectively) to stdout.

        Returns
        -------
        Dataframe with the respective raw data
        """

        if base_dir is None:
            base_dir = self.base_dir

        data = self._read_data(base_dir = base_dir, process_resultfolder = process_resultfolder)

        df = self.create_dataframe(data)

        finished = df[df['converged']]
        pending = df[~df['converged']]

        njobs =  len(df)
        nfinished = len(finished)
        npending = len(pending)

        if verbose:
            print(self._lim)
            print('*** Finished jobs ({} / {}) ***'.format(nfinished, njobs))
            print(self._lim)
            print(finished)
            print(self._lim)
            print('*** Pending jobs ({} / {}) ***'.format(npending, njobs))
            print(self._lim)
            print(pending)
        print(self._lim)
        print('*** Finished {} of {} jobs ***'.format(nfinished, njobs))
        print(self._lim)

        update_hdf_node(df, '/raw_data/{}'.format(node), self.store)

        return df
Exemplo n.º 4
0
    def write_energy_hdf5(self,
                          task,
                          info='',
                          dump_to_txt=False,
                          verbose=False):
        """
        Routine that wraps reading and writing to an HDF5 data base.
        The respective node will be determined by `get_inode()`

        Arguments
        ---------
        ''task''
            string
            Task to be analyzed. Will be filtered via `_normalize_task()`.

        ''info''
            string, optional (default = '')
            Additional node information. For instance a pseudopotential flag or
            something when sharing a database. Will be inserted after
            `raw_data` in the node.

        ''dump_to_txt''
            Boolean, optional (default : False)
            Dump the content of the pandas data frame to a clear txt file.
        ''verbose''
            Boolean, optional (default : False)
            Print the data frame content to stdout.

        Returns
        -------
        None
        """
        observable = 'energy'

        task = self._normalize_task(task)
        data = self.read_energy(task)
        df = self.create_dataframe(data=data,
                                   task=task,
                                   verbose=verbose,
                                   observable=observable)
        node = self.get_inode(task, observable=observable, info=info)
        update_hdf_node(df, node, self.store)

        if dump_to_txt:
            filename = self.seed + '__' + observable + '_' + task + '-variation.dat'
            filename = os.path.join(self.base_dir, filename)
            print('Dumping to clear text file:\n\t{}'.format(filename))
            with open(filename, 'w') as f:
                f.write('# {}'.format(f.name))
                f.write('\n# file written on: {}'.format(time.strftime('%c')))
                df_str = df.to_string().split('\n')

                # add the hashtags in front of comment lines
                df_str[0] = '#' + df_str[0][1::]
                df_str[1] = '# ' + df_str[1][:-2]

                for line in df_str:
                    f.write('\n' + line)
Exemplo n.º 5
0
    def analyze_database(self, node='PES'):
        """
        Function that basically just normalizes the energies in the database
        and stores it in '/analysis/PES'.

        Parameters
        ----------
        ''node''
            string, optional (default = 'PES')
            Node name for the HDF5 database. Note that this has to be the same
            as for the raw data.

        Returns
        -------
        Dataframe
        """
        df = self.store['/raw_data/{}'.format(node)]
        df['energy_normalized'] = df['energy'] - np.min(df['energy'])
        update_hdf_node(df, '/analysis/{}'.format(node), self.store)

        return df
Exemplo n.º 6
0
    def read(self, points, atoms_idx, atoms_names, verbose=False):
        """
        Loop over all points and read the electronic density of the clean
        surface at these points
        """

        # we can hard-code it here
        cs_cubefile = os.path.join(
            self.base_dir, 'clean_surface', 'cube_files',
            self.seed + '_CLEAN_SURFACE' + '-chargeden.cube.gz')

        if not os.path.exists(cs_cubefile):
            self.calc_cs_cube(verbose=verbose)

        cs_cube = self._interpolate_cs_cube(cs_cubefile)

        data = {}

        for point in points:

            point_str = self._point_to_string(point)
            point_dict = self._point_to_dict(point)

            positions = self.get_atoms(point).get_positions()
            for idx, name in zip(atoms_idx, atoms_names):
                pos = positions[idx]

                point_dict['rho_iaa_{}'.format(name)] = cs_cube(pos)

            data[point_str] = point_dict

        df = self.create_dataframe(data)
        update_hdf_node(df,
                        '/raw_data/{}/'.format(self._prefix.replace('-', '_')),
                        self.store)

        return df