Python PyFileIndex примеры использования

Язык программирования: Python

Пространство имен/Пакет: pyfileindex

Класс/Тип: PyFileIndex

Примеров на hotexamples.com: 7

Python PyFileIndex - 7 примеров найдено. Это лучшие примеры Python кода для pyfileindex.PyFileIndex, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

PyFileIndex(4)

update(3)

Основные методы

PyFileIndex (4)

update (3)

Пример #1

Показать файл

Файл: filetable.py Проект: ashtonmv/sphinx_input

 def force_reset(self):
     self._fileindex = PyFileIndex(path=self._project, filter_function=filter_function)
     df = pandas.DataFrame(self.init_table(fileindex=self._fileindex.dataframe))
     if len(df) != 0:
         self._job_table = df[np.array(self._columns)]
     else:
         self._job_table = pandas.DataFrame({k: [] for k in self._columns})

Пример #2

Показать файл

Файл: test_pyfileindex.py Проект: pyiron/pyfileindex

 def setUpClass(cls):
     cls.fi_with_filter = PyFileIndex(path=".",
                                      filter_function=filter_function)
     cls.fi_without_filter = PyFileIndex(path=".")
     cls.fi_debug = PyFileIndex(path=".",
                                filter_function=filter_function,
                                debug=True)
     cls.sleep_period = 5

Пример #3

Показать файл

Файл: scisweeper.py Проект: scisweeper/scisweeper

 def __init__(self,
              working_directory=".",
              job_class=None,
              cores=1,
              pysqa_config=None):
     self.working_directory = os.path.abspath(working_directory)
     if sys.version_info[0] >= 3:
         os.makedirs(self.working_directory, exist_ok=True)
     else:
         if not os.path.exists(self.working_directory):
             os.makedirs(self.working_directory)
     self._fileindex = PyFileIndex(path=self.working_directory,
                                   filter_function=filter_function)
     self._job_class = job_class
     self._results_df = None
     self._broken_jobs = []
     self._cores = cores
     self._job_name_function = None
     self.job = SciSweeperJob
     self._pysqa = None
     self.pysqa = pysqa_config
     self._job_id_lst = []

Пример #4

Показать файл

def copy_files_to_archive(directory_to_transfer,
                          archive_directory,
                          compressed=True):
    if archive_directory[-7:] == ".tar.gz":
        archive_directory = archive_directory[:-7]
        if not compressed:
            compressed = True

    if directory_to_transfer[-1] != "/":
        directory_to_transfer = os.path.basename(directory_to_transfer)
    else:
        directory_to_transfer = os.path.basename(directory_to_transfer[:-1])
    # print("directory to transfer: "+directory_to_transfer)
    pfi = PyFileIndex(path=directory_to_transfer,
                      filter_function=filter_function)
    df_files = pfi.dataframe[~pfi.dataframe.is_directory]

    # Create directories
    dir_lst = generate_list_of_directories(
        df_files=df_files,
        directory_to_transfer=directory_to_transfer,
        archive_directory=archive_directory,
    )
    # print(dir_lst)
    for d in dir_lst:
        os.makedirs(d, exist_ok=True)
    # Copy files
    dir_name_transfer = getdir(path=directory_to_transfer)
    for f in df_files.path.values:
        copyfile(
            f,
            os.path.join(
                archive_directory,
                dir_name_transfer,
                os.path.relpath(f, directory_to_transfer),
            ),
        )
    if compressed:
        compress_dir(archive_directory)

Пример #5

Показать файл

Файл: scisweeper.py Проект: scisweeper/scisweeper

class SciSweeper(object):
    def __init__(self,
                 working_directory=".",
                 job_class=None,
                 cores=1,
                 pysqa_config=None):
        self.working_directory = os.path.abspath(working_directory)
        if sys.version_info[0] >= 3:
            os.makedirs(self.working_directory, exist_ok=True)
        else:
            if not os.path.exists(self.working_directory):
                os.makedirs(self.working_directory)
        self._fileindex = PyFileIndex(path=self.working_directory,
                                      filter_function=filter_function)
        self._job_class = job_class
        self._results_df = None
        self._broken_jobs = []
        self._cores = cores
        self._job_name_function = None
        self.job = SciSweeperJob
        self._pysqa = None
        self.pysqa = pysqa_config
        self._job_id_lst = []

    @property
    def pysqa(self):
        return self._pysqa

    @pysqa.setter
    def pysqa(self, pysqa_config):
        if isinstance(pysqa_config, str):
            self._pysqa = QueueAdapter(pysqa_config)
        else:
            self._pysqa = pysqa_config

    @property
    def cores(self):
        return self._cores

    @cores.setter
    def cores(self, cores):
        self._cores = cores

    @property
    def job_name_function(self):
        return self._job_name_function

    @job_name_function.setter
    def job_name_function(self, job_name_function):
        self._job_name_function = job_name_function

    @property
    def job_class(self):
        return self._job_class

    @job_class.setter
    def job_class(self, job_class):
        self._job_class = job_class

    @property
    def results(self):
        return self._results_df

    @property
    def broken_jobs(self):
        return self._broken_jobs

    def collect(self):
        """
        Check status of the calculations and update the results table.
        """
        self._fileindex.update()
        dict_lst, broken_jobs = self._check_jobs()
        self._results_df = pandas.DataFrame(dict_lst)
        self._broken_jobs = (np.array([
            self._fileindex.dataframe[(~self._fileindex.dataframe.is_directory)
                                      & self._fileindex.dataframe.path.str.
                                      contains("/" + s + "/")].dirname.values
            for s in broken_jobs
        ]).flatten().tolist())

    def delete_jobs_from_queue(self):
        """
        Delete jobs from queuing system
        """
        if self._pysqa is not None:
            _ = [
                self.pysqa.delete_job(process_id=j[0])
                for j in self._job_id_lst
            ]

    def get_job_status(self):
        """
        Get job status from queuing system

        Returns:
            pandas.Dataframe/ None: Status table
        """
        if self._pysqa is not None:
            status_lst = self.pysqa.get_status_of_jobs(
                process_id_lst=[j[0] for j in self._job_id_lst])
            return pandas.DataFrame([{
                "queue_id": j[0],
                "job_name": j[1],
                "status": s
            } for s, j in zip(status_lst, self._job_id_lst)])

    def run_jobs_in_parallel(self,
                             input_dict_lst,
                             cores=None,
                             job_name_function=None):
        """
        Execute multiple SciSweeperJobs in parallel using multiprocessing.ThreadPool

        Args:
            input_dict_lst (list): List of dictionaries with input parametern
            cores (int/ None): number of cores to use = number of parallel threads.
            job_name_function (function/ None): Function which takes the input_dict and a counter as input to return the
                                                job_name as string. This can be defined by the user to have recognizable
                                                job names.
        """
        if cores is None:
            cores = self._cores
        if job_name_function is None:
            job_name_function = self.job_name_function
        if self._pysqa is None:
            tp = ThreadPool(cores)
        else:
            tp = None
        for counter, input_dict in enumerate(tqdm(input_dict_lst)):
            if job_name_function is not None:
                job_name = job_name_function(input_dict=input_dict,
                                             counter=counter)
                working_directory = os.path.abspath(
                    os.path.join(self.working_directory, job_name))
            else:
                working_directory = os.path.abspath(
                    os.path.join(self.working_directory,
                                 "job_" + str(counter)))
            if self._pysqa is None:
                tp.apply_async(run_parallel,
                               (self, working_directory, input_dict))
            else:
                self._job_id_lst.append([
                    self.job_class(
                        working_directory=working_directory,
                        input_dict=input_dict,
                        pysqa_config=self.pysqa,
                        cores=cores,
                    ).run(),
                    os.path.basename(working_directory),
                ])
        if self._pysqa is None:
            tp.close()
            tp.join()

    def run_job(self, job_working_directory, input_dict):
        """
        Run individual calculation.

        Args:
            job_working_directory (str): path to working directory
            input_dict (dict): dictionary with input parameters

        Returns:
            int/ None: If the job is submitted to a queuing system the queue id is returned, else it is None.
        """
        return self._job_class(
            working_directory=job_working_directory,
            input_dict=input_dict,
            pysqa_config=self.pysqa,
        ).run()

    def run_collect_output(self):
        """
        For each job in this directory and all sub directories collect the output again. Use this function after
        updating the collect_output function.
        """
        for path in tqdm(
                self._fileindex.dataframe[~self._fileindex.dataframe.
                                          is_directory].dirname.values):
            self._job_class(working_directory=path).run_collect_output()
        self.collect()

    def _check_jobs(self):
        """
        Internal helper function to check the jobs and build the results table.
        """
        dict_lst, all_keys_lst, broken_jobs = [], [], []
        for path in tqdm(
                self._fileindex.dataframe[~self._fileindex.dataframe.
                                          is_directory].dirname.values):
            job_dict = {}
            job_dict["dir"] = os.path.basename(path)
            job = self._job_class(working_directory=path)
            job.from_hdf()
            for k, v in job.input_dict.items():
                job_dict[k] = v
            for k, v in job.output_dict.items():
                job_dict[k] = v
            for k in job_dict.keys():
                all_keys_lst.append(k)
            dict_lst.append(job_dict)
        final_keys = list(set(all_keys_lst))
        for d in dict_lst:
            for k in final_keys:
                broken_flag = False
                if k not in d.keys():
                    d[k] = np.nan
                    broken_flag = True
                if broken_flag:
                    broken_jobs.append(d["dir"])
        return dict_lst, broken_jobs

Пример #6

Показать файл

Файл: filetable.py Проект: ashtonmv/sphinx_input

class FileTable(with_metaclass(Singleton)):
    def __init__(self, project):
        self._fileindex = None
        self._job_table = None
        self._project = os.path.abspath(project)
        self._columns = ['id', 'status', 'chemicalformula', 'job', 'subjob', 'projectpath', 'project', 'timestart',
                         'timestop', 'totalcputime', 'computer', 'hamilton', 'hamversion', 'parentid', 'masterid',
                         'username']
        self.force_reset()

    def force_reset(self):
        self._fileindex = PyFileIndex(path=self._project, filter_function=filter_function)
        df = pandas.DataFrame(self.init_table(fileindex=self._fileindex.dataframe))
        if len(df) != 0:
            self._job_table = df[np.array(self._columns)]
        else:
            self._job_table = pandas.DataFrame({k: [] for k in self._columns})

    def init_table(self, fileindex, working_dir_lst=None):
        if working_dir_lst is None:
            working_dir_lst = []
        fileindex = fileindex[~fileindex.is_directory]
        fileindex = fileindex.iloc[fileindex.path.values.argsort()]
        job_lst = []
        for path, mtime in zip(fileindex.path, fileindex.mtime):
            job_dict = self.get_extract(path, mtime)
            job_dict['id'] = len(working_dir_lst) + 1
            working_dir_lst.append(job_dict['project'][:-1] + job_dict['subjob'] + '_hdf5/')
            if job_dict['project'] in working_dir_lst:
                job_dict['masterid'] = working_dir_lst.index(job_dict['project']) + 1
            else:
                job_dict['masterid'] = None
            job_lst.append(job_dict)
        return job_lst

    @staticmethod
    def get_extract(path, mtime):
        basename = os.path.basename(path)
        job = os.path.splitext(basename)[0]
        time = datetime.datetime.fromtimestamp(mtime)
        return {'status': get_job_status_from_file(hdf5_file=path, job_name=job),
                'chemicalformula': None,
                'job': job,
                'subjob': '/' + job,
                'projectpath': None,
                'project': os.path.dirname(path) + '/',
                'timestart': time,
                'timestop': time,
                'totalcputime': 0.0,
                'computer': None,
                'username': None,
                'parentid': None,
                'hamilton': get_hamilton_from_file(hdf5_file=path, job_name=job),
                'hamversion': get_hamilton_version_from_file(hdf5_file=path, job_name=job)}

    def add_item_dict(self, par_dict):
        par_dict = dict((key.lower(), value) for key, value in par_dict.items())
        if len(self._job_table) != 0:
            job_id = np.max(self._job_table.id.values) + 1
        else:
            job_id = 1
        default_values = {'id': job_id,
                          'status': 'initialized',
                          'chemicalformula': None,
                          'timestart': datetime.datetime.now(),
                          'computer': None,
                          'parentid': None,
                          'username': None,
                          'timestop': None,
                          'totalcputime': None,
                          'masterid': None}
        for k, v in default_values.items():
            if k not in par_dict.keys():
                par_dict[k] = v
        self._job_table = pandas.concat([self._job_table,
                                         pandas.DataFrame([par_dict])[self._columns]]).reset_index(drop=True)
        return int(par_dict['id'])

    def item_update(self, par_dict, item_id):
        if isinstance(item_id, list):
            item_id = item_id[0]
        if isinstance(item_id, str):
            item_id = float(item_id)
        for k, v in par_dict.items():
            self._job_table.loc[self._job_table.id == int(item_id), k] = v

    def delete_item(self, item_id):
        item_id = int(item_id)
        if item_id in [int(v) for v in self._job_table.id.values]:
            self._job_table = self._job_table[self._job_table.id != item_id].reset_index(drop=True)
        else:
            raise ValueError

    def get_item_by_id(self, item_id):
        item_id = int(item_id)
        return {k: list(v.values())[0] for k, v in self._job_table[self._job_table.id == item_id].to_dict().items()}

    def get_items_dict(self, item_dict, return_all_columns=True):
        df = self._job_table
        if not isinstance(item_dict, dict):
            raise TypeError
        for k, v in item_dict.items():
            if k in ['id', 'parentid', 'masterid']:
                df = df[df[k] == int(v)]
            elif "%" not in str(v):
                df = df[df[k] == v]
            else:
                df = df[df[k].str.contains(v.replace('%', ''))]
        df_dict = df.to_dict()
        if return_all_columns:
            return [{k: v[i] for k, v in df_dict.items()} for i in df_dict['id'].keys()]
        else:
            return [{'id': i} for i in df_dict['id'].values()]

    def update(self):
        self._fileindex.update()
        if len(self._job_table) != 0:
            files_lst, working_dir_lst = zip(*[[project + subjob[1:] + '.h5', project + subjob[1:] + '_hdf5']
                                               for project, subjob in zip(self._job_table.project.values,
                                                                          self._job_table.subjob.values)])
            df_new = self._fileindex.dataframe[
                ~self._fileindex.dataframe.is_directory & ~self._fileindex.dataframe.path.isin(files_lst)]
        else:
            files_lst, working_dir_lst = [], []
            df_new = self._fileindex.dataframe[~self._fileindex.dataframe.is_directory]
        if len(df_new) > 0:
            job_lst = self.init_table(fileindex=df_new, working_dir_lst=list(working_dir_lst))
            df = pandas.DataFrame(job_lst)[self._columns]
            if len(files_lst) != 0 and len(working_dir_lst) != 0:
                self._job_table = pandas.concat([self._job_table, df]).reset_index(drop=True)
            else:
                self._job_table = df

    def get_db_columns(self):
        return self.get_table_headings()

    def get_table_headings(self):
        return self._job_table.columns.values

    def job_table(self, project=None, recursive=True, columns=None, all_columns=False, sort_by="id", max_colwidth=200,
                  job_name_contains=''):
        if project is None:
            project = self._project
        if columns is None:
            columns = ["job", "project", "chemicalformula"]
        if all_columns:
            columns = self._columns
        if len(self._job_table) != 0:
            if recursive:
                df = self._job_table[self._job_table.project.str.contains(project)]
            else:
                df = self._job_table[self._job_table.project == project]
        else:
            df = self._job_table
        pandas.set_option("display.max_colwidth", max_colwidth)
        if len(df) == 0:
            return df
        if job_name_contains != '':
            df = df[df.job.str.contains(job_name_contains)]
        if sort_by in columns:
            return df[columns].sort_values(by=sort_by)
        return df[columns]

    def get_jobs(self, project=None, recursive=True, columns=None):
        if project is None:
            project = self._project
        if columns is None:
            columns = ["id", "project"]
        df = self.job_table(project=project, recursive=recursive, columns=columns)
        if len(df) == 0:
            dictionary = {}
            for key in columns:
                dictionary[key] = list()
            return dictionary
            # return {key: list() for key in columns}
        dictionary = {}
        for key in df.keys():
            dictionary[key] = df[
                key
            ].tolist()  # ToDo: Check difference of tolist and to_list
        return dictionary

    def get_job_ids(self, project=None, recursive=True):
        return self.get_jobs(project=project, recursive=recursive, columns=['id'])["id"]

    def get_job_id(self, job_specifier, project=None):
        if project is None:
            project = self._project
        if isinstance(job_specifier, (int, np.integer)):
            return job_specifier  # is id

        job_specifier.replace(".", "_")
        # if job_specifier[0] is not '/':
        #     sub_job_name = '/' + job_specifier
        # else:
        #     sub_job_name = job_specifier
        # job_dict = _job_dict(database, sql_query, user, project_path, recursive=False,  # job=job_specifier,
        #                      sub_job_name=sub_job_name)
        # if len(job_dict) == 0:
        #     job_dict = _job_dict(database, sql_query, user, project_path, recursive=True,  # job=job_specifier,
        #                          sub_job_name=sub_job_name)
        job_id_lst = self._job_table[
            (self._job_table.project == project) & (self._job_table.job == job_specifier)].id.values
        if len(job_id_lst) == 0:
            job_id_lst = self._job_table[
                self._job_table.project.str.contains(project) & (self._job_table.job == job_specifier)].id.values
        if len(job_id_lst) == 0:
            return None
        elif len(job_id_lst) == 1:
            return int(job_id_lst[0])
        else:
            raise ValueError(
                "job name '{0}' in this project is not unique".format(job_specifier)
            )

    def get_child_ids(self, job_specifier, project=None, status=None):
        """
        Get the childs for a specific job

        Args:
            database (DatabaseAccess): Database object
            sql_query (str): SQL query to enter a more specific request
            user (str): username of the user whoes user space should be searched
            project_path (str): root_path - this is in contrast to the project_path in GenericPath
            job_specifier (str): name of the master job or the master jobs job ID
            status (str): filter childs which match a specific status - None by default

        Returns:
            list: list of child IDs
        """
        if project is None:
            project = self._project
        id_master = self.get_job_id(project=project, job_specifier=job_specifier)
        if id_master is None:
            return []
        else:
            if status is not None:
                id_lst = self._job_table[
                    (self._job_table.masterid == id_master) & (self._job_table.status == status)].id.values
            else:
                id_lst = self._job_table[(self._job_table.masterid == id_master)].id.values
            return sorted(id_lst)

    def set_job_status(self, job_specifier, status, project=None):
        """
        Set the status of a particular job

        Args:
            database (DatabaseAccess): Database object
            sql_query (str): SQL query to enter a more specific request
            user (str): username of the user whoes user space should be searched
            project_path (str): root_path - this is in contrast to the project_path in GenericPath
            job_specifier (str): name of the job or job ID
            status (str): job status can be one of the following ['initialized', 'appended', 'created', 'submitted',
                         'running', 'aborted', 'collect', 'suspended', 'refresh', 'busy', 'finished']

        """
        if project is None:
            project = self._project
        job_id = self.get_job_id(project=project, job_specifier=job_specifier)
        self._job_table.loc[self._job_table.id == job_id, 'status'] = status
        db_entry = self.get_item_by_id(item_id=job_id)
        h5io.write_hdf5(db_entry["project"] + db_entry["subjob"] + '.h5',
                        status,
                        title=db_entry["subjob"][1:] + '/status',
                        overwrite="update")

    def get_job_status(self, job_specifier, project=None):
        """
        Get the status of a particular job

        Args:
            database (DatabaseAccess): Database object
            sql_query (str): SQL query to enter a more specific request
            user (str): username of the user whoes user space should be searched
            project_path (str): root_path - this is in contrast to the project_path in GenericPath
            job_specifier (str): name of the job or job ID

        Returns:
            str: job status can be one of the following ['initialized', 'appended', 'created', 'submitted', 'running',
                 'aborted', 'collect', 'suspended', 'refresh', 'busy', 'finished']
        """
        if project is None:
            project = self._project
        try:
            return self._job_table[
                self._job_table.id == self.get_job_id(project=project, job_specifier=job_specifier)].status.values[0]
        except KeyError:
            return None

    def get_job_working_directory(self, job_specifier, project=None):
        """
        Get the working directory of a particular job

        Args:
            database (DatabaseAccess): Database object
            sql_query (str): SQL query to enter a more specific request
            user (str): username of the user whoes user space should be searched
            project_path (str): root_path - this is in contrast to the project_path in GenericPath
            job_specifier (str): name of the job or job ID

        Returns:
            str: working directory as absolute path
        """
        if project is None:
            project = self._project
        try:
            db_entry = self.get_item_by_id(item_id=self.get_job_id(project=project, job_specifier=job_specifier))
            if db_entry and len(db_entry) > 0:
                job_name = db_entry["subjob"][1:]
                return os.path.join(
                    db_entry["project"],
                    job_name + "_hdf5",
                    job_name,
                )
            else:
                return None
        except KeyError:
            return None

Пример #7

Показать файл

class FileTable(IsDatabase, metaclass=Singleton):
    def __init__(self, project):
        self._fileindex = None
        self._job_table = None
        self._project = os.path.abspath(project)
        self._columns = [
            "id",
            "status",
            "chemicalformula",
            "job",
            "subjob",
            "projectpath",
            "project",
            "timestart",
            "timestop",
            "totalcputime",
            "computer",
            "hamilton",
            "hamversion",
            "parentid",
            "masterid",
            "username",
        ]
        self.force_reset()

    def _get_view_mode(self):
        return False

    def force_reset(self):
        self._fileindex = PyFileIndex(path=self._project,
                                      filter_function=filter_function)
        df = pandas.DataFrame(
            self.init_table(fileindex=self._fileindex.dataframe))
        if len(df) != 0:
            df.id = df.id.astype(int)
            self._job_table = df[np.array(self._columns)]
        else:
            self._job_table = pandas.DataFrame({k: [] for k in self._columns})

    def init_table(self, fileindex, working_dir_lst=None):
        if working_dir_lst is None:
            working_dir_lst = []
        fileindex = fileindex[~fileindex.is_directory]
        fileindex = fileindex.iloc[fileindex.path.values.argsort()]
        job_lst = []
        for path, mtime in zip(fileindex.path, fileindex.mtime):
            job_dict = self.get_extract(path, mtime)
            job_dict["id"] = len(working_dir_lst) + 1
            working_dir_lst.append(job_dict["project"][:-1] +
                                   job_dict["subjob"] + "_hdf5/")
            if job_dict["project"] in working_dir_lst:
                job_dict["masterid"] = working_dir_lst.index(
                    job_dict["project"]) + 1
            else:
                job_dict["masterid"] = None
            job_lst.append(job_dict)
        return job_lst

    def add_item_dict(self, par_dict):
        par_dict = dict(
            (key.lower(), value) for key, value in par_dict.items())
        if len(self._job_table) != 0:
            job_id = np.max(self._job_table.id.values) + 1
        else:
            job_id = 1
        default_values = {
            "id": job_id,
            "status": "initialized",
            "chemicalformula": None,
            "timestart": datetime.datetime.now(),
            "computer": None,
            "parentid": None,
            "username": None,
            "timestop": None,
            "totalcputime": None,
            "masterid": None,
        }
        for k, v in default_values.items():
            if k not in par_dict.keys():
                par_dict[k] = v
        self._job_table = pandas.concat([
            self._job_table,
            pandas.DataFrame([par_dict])[self._columns]
        ]).reset_index(drop=True)
        return int(par_dict["id"])

    def item_update(self, par_dict, item_id):
        if isinstance(item_id, list):
            item_id = item_id[0]
        if isinstance(item_id, str):
            item_id = float(item_id)
        for k, v in par_dict.items():
            self._job_table.loc[self._job_table.id == int(item_id), k] = v

    def delete_item(self, item_id):
        item_id = int(item_id)
        if item_id in [int(v) for v in self._job_table.id.values]:
            self._job_table = self._job_table[
                self._job_table.id != item_id].reset_index(drop=True)
        else:
            raise ValueError

    def get_item_by_id(self, item_id):
        item_id = int(item_id)
        return {
            k: list(v.values())[0]
            for k, v in self._job_table[self._job_table.id ==
                                        item_id].to_dict().items()
        }

    def get_items_dict(self, item_dict, return_all_columns=True):
        df = self._job_table
        if not isinstance(item_dict, dict):
            raise TypeError
        for k, v in item_dict.items():
            if k in ["id", "parentid", "masterid"]:
                df = df[df[k] == int(v)]
            elif "%" not in str(v):
                df = df[df[k] == v]
            else:
                df = df[df[k].str.contains(v.replace("%", ""))]
        df_dict = df.to_dict()
        if return_all_columns:
            return [{k: v[i]
                     for k, v in df_dict.items()}
                    for i in df_dict["id"].keys()]
        else:
            return [{"id": i} for i in df_dict["id"].values()]

    def update(self):
        self._job_table.status = [
            self._get_job_status_from_hdf5(job_id)
            for job_id in self._job_table.id.values
        ]
        self._fileindex.update()
        if len(self._job_table) != 0:
            files_lst, working_dir_lst = zip(*[[
                project + subjob[1:] + ".h5", project + subjob[1:] + "_hdf5"
            ] for project, subjob in zip(self._job_table.project.values,
                                         self._job_table.subjob.values)])
            df_new = self._fileindex.dataframe[
                ~self._fileindex.dataframe.is_directory
                & ~self._fileindex.dataframe.path.isin(files_lst)]
        else:
            files_lst, working_dir_lst = [], []
            df_new = self._fileindex.dataframe[~self._fileindex.dataframe.
                                               is_directory]
        if len(df_new) > 0:
            job_lst = self.init_table(fileindex=df_new,
                                      working_dir_lst=list(working_dir_lst))
            df = pandas.DataFrame(job_lst)[self._columns]
            if len(files_lst) != 0 and len(working_dir_lst) != 0:
                self._job_table = pandas.concat([self._job_table,
                                                 df]).reset_index(drop=True)
            else:
                self._job_table = df

    def _get_table_headings(self, table_name=None):
        return self._job_table.columns.values

    def _get_job_table(
        self,
        sql_query,
        user,
        project_path=None,
        recursive=True,
        columns=None,
        element_lst=None,
    ):
        self.update()
        if project_path is None:
            project_path = self._project
        if len(self._job_table) != 0:
            if recursive:
                return self._job_table[self._job_table.project.str.contains(
                    project_path)]
            else:
                return self._job_table[self._job_table.project == project_path]
        else:
            return self._job_table

    def get_jobs(self, project=None, recursive=True, columns=None):
        if project is None:
            project = self._project
        if columns is None:
            columns = ["id", "project"]
        df = self.job_table(
            sql_query=None,
            user=None,
            project_path=project,
            recursive=recursive,
            columns=columns,
        )
        if len(df) == 0:
            dictionary = {}
            for key in columns:
                dictionary[key] = list()
            return dictionary
            # return {key: list() for key in columns}
        dictionary = {}
        for key in df.keys():
            dictionary[key] = df[key].tolist(
            )  # ToDo: Check difference of tolist and to_list
        return dictionary

    def get_job_ids(self, project=None, recursive=True):
        return self.get_jobs(project=project,
                             recursive=recursive,
                             columns=["id"])["id"]

    def get_job_id(self, job_specifier, project=None):
        if project is None:
            project = self._project
        if isinstance(job_specifier, (int, np.integer)):
            return job_specifier  # is id

        job_specifier.replace(".", "_")
        job_id_lst = self._job_table[
            (self._job_table.project == project)
            & (self._job_table.job == job_specifier)].id.values
        if len(job_id_lst) == 0:
            job_id_lst = self._job_table[
                self._job_table.project.str.contains(project)
                & (self._job_table.job == job_specifier)].id.values
        if len(job_id_lst) == 0:
            return None
        elif len(job_id_lst) == 1:
            return int(job_id_lst[0])
        else:
            raise ValueError(
                "job name '{0}' in this project is not unique".format(
                    job_specifier))

    def get_child_ids(self, job_specifier, project=None, status=None):
        """
        Get the childs for a specific job

        Args:
            database (DatabaseAccess): Database object
            sql_query (str): SQL query to enter a more specific request
            user (str): username of the user whoes user space should be searched
            project_path (str): root_path - this is in contrast to the project_path in GenericPath
            job_specifier (str): name of the master job or the master jobs job ID
            status (str): filter childs which match a specific status - None by default

        Returns:
            list: list of child IDs
        """
        if project is None:
            project = self._project
        id_master = self.get_job_id(project=project,
                                    job_specifier=job_specifier)
        if id_master is None:
            return []
        else:
            if status is not None:
                id_lst = self._job_table[
                    (self._job_table.masterid == id_master)
                    & (self._job_table.status == status)].id.values
            else:
                id_lst = self._job_table[(
                    self._job_table.masterid == id_master)].id.values
            return sorted(id_lst)

    def get_job_working_directory(self, job_id):
        """
        Get the working directory of a particular job

        Args:
            job_id (int):

        Returns:
            str: working directory as absolute path
        """
        try:
            db_entry = self.get_item_by_id(job_id)
            if db_entry and len(db_entry) > 0:
                job_name = db_entry["subjob"][1:]
                return os.path.join(
                    db_entry["project"],
                    job_name + "_hdf5",
                    job_name,
                )
            else:
                return None
        except KeyError:
            return None

    def _get_job_status_from_hdf5(self, job_id):
        db_entry = self.get_item_by_id(job_id)
        job_name = db_entry["subjob"][1:]
        return get_job_status_from_file(
            hdf5_file=os.path.join(db_entry["project"], job_name + ".h5"),
            job_name=job_name,
        )

    def get_job_status(self, job_id):
        return self._job_table[self._job_table.id == job_id].status.values[0]

    def set_job_status(self, job_id, status):
        db_entry = self.get_item_by_id(item_id=job_id)
        self._job_table.loc[self._job_table.id == job_id, "status"] = status
        h5io.write_hdf5(
            db_entry["project"] + db_entry["subjob"] + ".h5",
            status,
            title=db_entry["subjob"][1:] + "/status",
            overwrite="update",
        )

    @staticmethod
    def get_extract(path, mtime):
        basename = os.path.basename(path)
        job = os.path.splitext(basename)[0]
        time = datetime.datetime.fromtimestamp(mtime)
        return {
            "status":
            get_job_status_from_file(hdf5_file=path, job_name=job),
            "chemicalformula":
            None,
            "job":
            job,
            "subjob":
            "/" + job,
            "projectpath":
            None,
            "project":
            os.path.dirname(path) + "/",
            "timestart":
            time,
            "timestop":
            time,
            "totalcputime":
            0.0,
            "computer":
            None,
            "username":
            None,
            "parentid":
            None,
            "hamilton":
            get_hamilton_from_file(hdf5_file=path, job_name=job),
            "hamversion":
            get_hamilton_version_from_file(hdf5_file=path, job_name=job),
        }