Пример #1
0
    def assimilate(self, old_task):
        """
        Parses vasp runs. Then insert the result into the db. and return the
        task_id or doc of the insertion.

        Returns:
            If in simulate_mode, the entire doc is returned for debugging
            purposes. Else, only the task_id of the inserted doc is returned.
        """

        path = old_task["dir_name"]  # AJ: get dir name from task
        d = self.get_task_doc(path, self.parse_dos, self.additional_fields)
        d["dir_name_full"] = d["dir_name"].split(":")[1]
        d["dir_name"] = get_block_part(d["dir_name_full"])
        if not self.simulate:
            # Perform actual insertion into db. Because db connections cannot
            # be pickled, every insertion needs to create a new connection
            # to the db.
            conn = MongoClient(self.host, self.port)
            db = conn[self.database]
            if self.user:
                db.authenticate(self.user, self.password)
            coll = db[self.collection]

            # Insert dos data into gridfs and then remove it from the dict.
            # DOS data tends to be above the 4Mb limit for mongo docs. A ref
            # to the dos file is in the dos_fs_id.
            result = coll.find_one({"dir_name": d["dir_name"]})
            if result is None or self.update_duplicates:
                if self.parse_dos and "calculations" in d:
                    for calc in d["calculations"]:
                        if "dos" in calc:
                            dos = json.dumps(calc["dos"], cls=MontyEncoder)
                            fs = gridfs.GridFS(db, "dos_fs")
                            dosid = fs.put(dos)
                            calc["dos_fs_id"] = dosid
                            del calc["dos"]

                d["last_updated"] = datetime.datetime.today()
                if result is None:
                    d["task_id"] = "mp-{}".format(old_task["task_id"])  # AJ: old task_id is new
                    logger.info("Inserting {} with taskid = {}".format(d["dir_name"], d["task_id"]))
                elif self.update_duplicates:
                    d["task_id"] = result["task_id"]
                    logger.info("Updating {} with taskid = {}".format(d["dir_name"], d["task_id"]))

                # Fireworks processing
                self.process_fw(old_task, d)
                coll.update({"dir_name": d["dir_name"]}, {"$set": d}, upsert=True)
                return d["task_id"], d
            else:
                logger.info("Skipping duplicate {}".format(d["dir_name"]))
                return result["task_id"], result

        else:
            d["task_id"] = 0
            logger.info("Simulated insert into database for {} with task_id {}".format(d["dir_name"], d["task_id"]))
            return 0, d
Пример #2
0
    def run_task(self, fw_spec):

        # write a file containing the formula and task_type for somewhat
        # easier file system browsing
        self._write_formula_file(fw_spec)

        # TODO: make this better - is there a way to load an environment
        # variable as the VASP_EXE?
        if 'nid' in socket.gethostname():  # hopper compute nodes
            # TODO: can base ncores on FW_submit.script
            v_exe = shlex.split('aprun -n 48 vasp')
            gv_exe = shlex.split('aprun -n 48 gvasp')
            print 'running on HOPPER'
        elif 'c' in socket.gethostname():  # mendel compute nodes
            # TODO: can base ncores on FW_submit.script
            v_exe = shlex.split('mpirun -n 32 vasp')
            gv_exe = shlex.split('aprun -n 32 gvasp')
            print 'running on MENDEL'
        else:

            raise ValueError('Unrecognized host!')

        for job in self.jobs:
            job.vasp_cmd = v_exe
            job.gamma_vasp_cmd = gv_exe

        logging.basicConfig(level=logging.DEBUG)
        c = Custodian(self.handlers, self.jobs, self.max_errors)
        custodian_out = c.run()

        all_errors = set()
        for run in custodian_out:
            for correction in run['corrections']:
                all_errors.update(correction['errors'])

        stored_data = {'error_list': list(all_errors)}
        update_spec = {
            'prev_vasp_dir': get_block_part(os.getcwd()),
            'prev_task_type': fw_spec['task_type'],
            'mpsnl': fw_spec['mpsnl'],
            'snlgroup_id': fw_spec['snlgroup_id'],
            'run_tags': fw_spec['run_tags']
        }

        return FWAction(stored_data=stored_data, update_spec=update_spec)
Пример #3
0
    def run_task(self, fw_spec):

        # write a file containing the formula and task_type for somewhat
        # easier file system browsing
        self._write_formula_file(fw_spec)

        # TODO: make this better - is there a way to load an environment
        # variable as the VASP_EXE?
        if 'nid' in socket.gethostname():  # hopper compute nodes
            # TODO: can base ncores on FW_submit.script
            v_exe = shlex.split('aprun -n 48 vasp')
            gv_exe = shlex.split('aprun -n 48 gvasp')
            print 'running on HOPPER'
        elif 'c' in socket.gethostname():  # mendel compute nodes
            # TODO: can base ncores on FW_submit.script
            v_exe = shlex.split('mpirun -n 32 vasp')
            gv_exe = shlex.split('aprun -n 32 gvasp')
            print 'running on MENDEL'
        else:

            raise ValueError('Unrecognized host!')

        for job in self.jobs:
            job.vasp_cmd = v_exe
            job.gamma_vasp_cmd = gv_exe

        logging.basicConfig(level=logging.DEBUG)
        c = Custodian(self.handlers, self.jobs, self.max_errors)
        custodian_out = c.run()

        all_errors = set()
        for run in custodian_out:
            for correction in run['corrections']:
                all_errors.update(correction['errors'])

        stored_data = {'error_list': list(all_errors)}
        update_spec = {'prev_vasp_dir': get_block_part(os.getcwd()),
                       'prev_task_type': fw_spec['task_type'],
                       'mpsnl': fw_spec['mpsnl'],
                       'snlgroup_id': fw_spec['snlgroup_id'],
                       'run_tags': fw_spec['run_tags']}

        return FWAction(stored_data=stored_data, update_spec=update_spec)
Пример #4
0
    def run_task(self, fw_spec):

        # get the band structure and nelect from files
        """
        prev_dir = get_loc(fw_spec['prev_vasp_dir'])
        vasprun_loc = zpath(os.path.join(prev_dir, 'vasprun.xml'))
        kpoints_loc = zpath(os.path.join(prev_dir, 'KPOINTS'))

        vr = Vasprun(vasprun_loc)
        bs = vr.get_band_structure(kpoints_filename=kpoints_loc)
        """

        # get the band structure and nelect from DB
        block_part = get_block_part(fw_spec['prev_vasp_dir'])

        db_dir = os.environ['DB_LOC']
        assert isinstance(db_dir, object)
        db_path = os.path.join(db_dir, 'tasks_db.json')
        with open(db_path) as f:
            creds = json.load(f)
            connection = MongoClient(creds['host'], creds['port'])
            tdb = connection[creds['database']]
            tdb.authenticate(creds['admin_user'], creds['admin_password'])

            m_task = tdb.tasks.find_one({"dir_name": block_part}, {"calculations": 1, "task_id": 1})
            nelect = m_task['calculations'][0]['input']['parameters']['NELECT']
            bs_id = m_task['calculations'][0]['band_structure_fs_id']
            print bs_id, type(bs_id)
            fs = gridfs.GridFS(tdb, 'band_structure_fs')
            bs_dict = json.loads(fs.get(bs_id).read())
            bs_dict['structure'] = m_task['calculations'][0]['output']['crystal']
            bs = BandStructure.from_dict(bs_dict)
            print 'Band Structure found:', bool(bs)
            print nelect

            # run Boltztrap
            runner = BoltztrapRunner(bs, nelect)
            dir = runner.run(path_dir=os.getcwd())

            # put the data in the database
            bta = BoltztrapAnalyzer.from_files(dir)
            data = bta.to_dict
            data.update(get_meta_from_structure(bs._structure))
            data['snlgroup_id'] = fw_spec['snlgroup_id']
            data['run_tags'] = fw_spec['run_tags']
            data['snl'] = fw_spec['mpsnl']
            data['dir_name_full'] = dir
            data['dir_name'] = get_block_part(dir)
            data['task_id'] = m_task['task_id']
            data['hall'] = {}  # remove because it is too large and not useful
            data['hall_doping'] = {}  # remove because it is too large and not useful
            tdb.boltztrap.insert(clean_json(data))

        update_spec = {'prev_vasp_dir': fw_spec['prev_vasp_dir'],
                       'boltztrap_dir': os.getcwd(),
                       'prev_task_type': fw_spec['task_type'],
                       'mpsnl': fw_spec['mpsnl'],
                       'snlgroup_id': fw_spec['snlgroup_id'],
                       'run_tags': fw_spec['run_tags'], 'parameters': fw_spec.get('parameters')}

        return FWAction(update_spec=update_spec)
Пример #5
0
    def run_task(self, fw_spec):
        # import here to prevent import errors in bigger MPCollab
        # get the band structure and nelect from files
        """
        prev_dir = get_loc(fw_spec['prev_vasp_dir'])
        vasprun_loc = zpath(os.path.join(prev_dir, 'vasprun.xml'))
        kpoints_loc = zpath(os.path.join(prev_dir, 'KPOINTS'))

        vr = Vasprun(vasprun_loc)
        bs = vr.get_band_structure(kpoints_filename=kpoints_loc)
        """
        filename = get_slug(
            'JOB--' + fw_spec['mpsnl'].structure.composition.reduced_formula + '--'
            + fw_spec['task_type'])
        with open(filename, 'w+') as f:
            f.write('')

        # get the band structure and nelect from DB
        block_part = get_block_part(fw_spec['prev_vasp_dir'])

        db_dir = os.environ['DB_LOC']
        assert isinstance(db_dir, object)
        db_path = os.path.join(db_dir, 'tasks_db.json')
        with open(db_path) as f:
            creds = json.load(f)
            connection = MongoClient(creds['host'], creds['port'])
            tdb = connection[creds['database']]
            tdb.authenticate(creds['admin_user'], creds['admin_password'])

            props = {"calculations": 1, "task_id": 1, "state": 1, "pseudo_potential": 1, "run_type": 1, "is_hubbard": 1, "hubbards": 1, "unit_cell_formula": 1}
            m_task = tdb.tasks.find_one({"dir_name": block_part}, props)
            if not m_task:
                time.sleep(60)  # only thing to think of is wait for DB insertion(?)
                m_task = tdb.tasks.find_one({"dir_name": block_part}, props)

            if not m_task:
                raise ValueError("Could not find task with dir_name: {}".format(block_part))

            if m_task['state'] != 'successful':
                raise ValueError("Cannot run Boltztrap; parent job unsuccessful")

            nelect = m_task['calculations'][0]['input']['parameters']['NELECT']
            bs_id = m_task['calculations'][0]['band_structure_fs_id']
            print bs_id, type(bs_id)
            fs = gridfs.GridFS(tdb, 'band_structure_fs')
            bs_dict = json.loads(fs.get(bs_id).read())
            bs_dict['structure'] = m_task['calculations'][0]['output']['crystal']
            bs = BandStructure.from_dict(bs_dict)
            print 'Band Structure found:', bool(bs)
            print nelect

            # run Boltztrap
            runner = BoltztrapRunner(bs, nelect)
            dir = runner.run(path_dir=os.getcwd())

            # put the data in the database
            bta = BoltztrapAnalyzer.from_files(dir)

            # 8/21/15 - Anubhav removed fs_id (also see line further below, ted['boltztrap_full_fs_id'] ...)
            # 8/21/15 - this is to save space in MongoDB, as well as non-use of full Boltztrap output (vs rerun)
            """
            data = bta.as_dict()
            data.update(get_meta_from_structure(bs._structure))
            data['snlgroup_id'] = fw_spec['snlgroup_id']
            data['run_tags'] = fw_spec['run_tags']
            data['snl'] = fw_spec['mpsnl']
            data['dir_name_full'] = dir
            data['dir_name'] = get_block_part(dir)
            data['task_id'] = m_task['task_id']
            del data['hall']  # remove because it is too large and not useful
            fs = gridfs.GridFS(tdb, "boltztrap_full_fs")
            btid = fs.put(json.dumps(jsanitize(data)))
            """

            # now for the "sanitized" data
            ted = bta.as_dict()
            del ted['seebeck']
            del ted['hall']
            del ted['kappa']
            del ted['cond']

            # ted['boltztrap_full_fs_id'] = btid
            ted['snlgroup_id'] = fw_spec['snlgroup_id']
            ted['run_tags'] = fw_spec['run_tags']
            ted['snl'] = fw_spec['mpsnl'].as_dict()
            ted['dir_name_full'] = dir
            ted['dir_name'] = get_block_part(dir)
            ted['task_id'] = m_task['task_id']

            ted['pf_doping'] = bta.get_power_factor(output='tensor', relaxation_time=self.TAU)
            ted['zt_doping'] = bta.get_zt(output='tensor', relaxation_time=self.TAU, kl=self.KAPPAL)

            ted['pf_eigs'] = self.get_eigs(ted, 'pf_doping')
            ted['pf_best'] = self.get_extreme(ted, 'pf_eigs')
            ted['pf_best_dope18'] = self.get_extreme(ted, 'pf_eigs', max_didx=3)
            ted['pf_best_dope19'] = self.get_extreme(ted, 'pf_eigs', max_didx=4)
            ted['zt_eigs'] = self.get_eigs(ted, 'zt_doping')
            ted['zt_best'] = self.get_extreme(ted, 'zt_eigs')
            ted['zt_best_dope18'] = self.get_extreme(ted, 'zt_eigs', max_didx=3)
            ted['zt_best_dope19'] = self.get_extreme(ted, 'zt_eigs', max_didx=4)
            ted['seebeck_eigs'] = self.get_eigs(ted, 'seebeck_doping')
            ted['seebeck_best'] = self.get_extreme(ted, 'seebeck_eigs')
            ted['seebeck_best_dope18'] = self.get_extreme(ted, 'seebeck_eigs', max_didx=3)
            ted['seebeck_best_dope19'] = self.get_extreme(ted, 'seebeck_eigs', max_didx=4)
            ted['cond_eigs'] = self.get_eigs(ted, 'cond_doping')
            ted['cond_best'] = self.get_extreme(ted, 'cond_eigs')
            ted['cond_best_dope18'] = self.get_extreme(ted, 'cond_eigs', max_didx=3)
            ted['cond_best_dope19'] = self.get_extreme(ted, 'cond_eigs', max_didx=4)
            ted['kappa_eigs'] = self.get_eigs(ted, 'kappa_doping')
            ted['kappa_best'] = self.get_extreme(ted, 'kappa_eigs', maximize=False)
            ted['kappa_best_dope18'] = self.get_extreme(ted, 'kappa_eigs', maximize=False, max_didx=3)
            ted['kappa_best_dope19'] = self.get_extreme(ted, 'kappa_eigs', maximize=False, max_didx=4)

            try:
	        from mpcollab.thermoelectrics.boltztrap_TE import BoltzSPB
                bzspb = BoltzSPB(ted)
                maxpf_p = bzspb.get_maximum_power_factor('p', temperature=0, tau=1E-14, ZT=False, kappal=0.5,\
                    otherprops=('get_seebeck_mu_eig', 'get_conductivity_mu_eig', \
                                                    'get_thermal_conductivity_mu_eig', 'get_average_eff_mass_tensor_mu'))

                maxpf_n = bzspb.get_maximum_power_factor('n', temperature=0, tau=1E-14, ZT=False, kappal=0.5,\
                    otherprops=('get_seebeck_mu_eig', 'get_conductivity_mu_eig', \
                                                    'get_thermal_conductivity_mu_eig', 'get_average_eff_mass_tensor_mu'))

                maxzt_p = bzspb.get_maximum_power_factor('p', temperature=0, tau=1E-14, ZT=True, kappal=0.5, otherprops=('get_seebeck_mu_eig', 'get_conductivity_mu_eig', \
                                                    'get_thermal_conductivity_mu_eig', 'get_average_eff_mass_tensor_mu'))

                maxzt_n = bzspb.get_maximum_power_factor('n', temperature=0, tau=1E-14, ZT=True, kappal=0.5, otherprops=('get_seebeck_mu_eig', 'get_conductivity_mu_eig', \
                                                    'get_thermal_conductivity_mu_eig', 'get_average_eff_mass_tensor_mu'))

                ted['zt_best_finemesh'] = {'p': maxzt_p, 'n': maxzt_n}
                ted['pf_best_finemesh'] = {'p': maxpf_p, 'n': maxpf_n}
            except:
                import traceback
                traceback.print_exc()
                print 'COULD NOT GET FINE MESH DATA'

            # add is_compatible
            mpc = MaterialsProjectCompatibility("Advanced")
            try:
                func = m_task["pseudo_potential"]["functional"]
                labels = m_task["pseudo_potential"]["labels"]
                symbols = ["{} {}".format(func, label) for label in labels]
                parameters = {"run_type": m_task["run_type"],
                          "is_hubbard": m_task["is_hubbard"],
                          "hubbards": m_task["hubbards"],
                          "potcar_symbols": symbols}
                entry = ComputedEntry(Composition(m_task["unit_cell_formula"]),
                                      0.0, 0.0, parameters=parameters,
                                      entry_id=m_task["task_id"])

                ted["is_compatible"] = bool(mpc.process_entry(entry))
            except:
                traceback.print_exc()
                print 'ERROR in getting compatibility, task_id: {}'.format(m_task["task_id"])
                ted["is_compatible"] = None

            tdb.boltztrap.insert(jsanitize(ted))

            update_spec = {'prev_vasp_dir': fw_spec['prev_vasp_dir'],
                       'boltztrap_dir': os.getcwd(),
                       'prev_task_type': fw_spec['task_type'],
                       'mpsnl': fw_spec['mpsnl'].as_dict(),
                       'snlgroup_id': fw_spec['snlgroup_id'],
                       'run_tags': fw_spec['run_tags'], 'parameters': fw_spec.get('parameters')}

        return FWAction(update_spec=update_spec)
Пример #6
0
    def assimilate(self, old_task):
        """
        Parses vasp runs. Then insert the result into the db. and return the
        task_id or doc of the insertion.

        Returns:
            If in simulate_mode, the entire doc is returned for debugging
            purposes. Else, only the task_id of the inserted doc is returned.
        """

        path = old_task['dir_name']  # AJ: get dir name from task
        d = self.get_task_doc(path, self.parse_dos,
                              self.additional_fields)
        d["dir_name_full"] = d["dir_name"].split(":")[1]
        d["dir_name"] = get_block_part(d["dir_name_full"])
        if not self.simulate:
            # Perform actual insertion into db. Because db connections cannot
            # be pickled, every insertion needs to create a new connection
            # to the db.
            conn = MongoClient(self.host, self.port)
            db = conn[self.database]
            if self.user:
                db.authenticate(self.user, self.password)
            coll = db[self.collection]

            # Insert dos data into gridfs and then remove it from the dict.
            # DOS data tends to be above the 4Mb limit for mongo docs. A ref
            # to the dos file is in the dos_fs_id.
            result = coll.find_one({"dir_name": d["dir_name"]})
            if result is None or self.update_duplicates:
                if self.parse_dos and "calculations" in d:
                    for calc in d["calculations"]:
                        if "dos" in calc:
                            dos = json.dumps(calc["dos"])
                            fs = gridfs.GridFS(db, "dos_fs")
                            dosid = fs.put(dos)
                            calc["dos_fs_id"] = dosid
                            del calc["dos"]

                d["last_updated"] = datetime.datetime.today()
                if result is None:
                    d["task_id"] = "mp-{}".format(old_task['task_id'])  # AJ: old task_id is new
                    logger.info("Inserting {} with taskid = {}"
                    .format(d["dir_name"], d["task_id"]))
                elif self.update_duplicates:
                    d["task_id"] = result["task_id"]
                    logger.info("Updating {} with taskid = {}"
                    .format(d["dir_name"], d["task_id"]))

                #Fireworks processing
                self.process_fw(old_task, d)
                coll.update({"dir_name": d["dir_name"]}, {"$set": d},
                            upsert=True)
                return d["task_id"], d
            else:
                logger.info("Skipping duplicate {}".format(d["dir_name"]))
                return result["task_id"], result

        else:
            d["task_id"] = 0
            logger.info("Simulated insert into database for {} with task_id {}"
            .format(d["dir_name"], d["task_id"]))
            return 0, d
Пример #7
0
    def run_task(self, fw_spec):
        # import here to prevent import errors in bigger MPCollab
        # get the band structure and nelect from files
        """
        prev_dir = get_loc(fw_spec['prev_vasp_dir'])
        vasprun_loc = zpath(os.path.join(prev_dir, 'vasprun.xml'))
        kpoints_loc = zpath(os.path.join(prev_dir, 'KPOINTS'))

        vr = Vasprun(vasprun_loc)
        bs = vr.get_band_structure(kpoints_filename=kpoints_loc)
        """
        filename = get_slug(
            'JOB--' + fw_spec['mpsnl'].structure.composition.reduced_formula +
            '--' + fw_spec['task_type'])
        with open(filename, 'w+') as f:
            f.write('')

        # get the band structure and nelect from DB
        block_part = get_block_part(fw_spec['prev_vasp_dir'])

        db_dir = os.environ['DB_LOC']
        assert isinstance(db_dir, object)
        db_path = os.path.join(db_dir, 'tasks_db.json')
        with open(db_path) as f:
            creds = json.load(f)
            connection = MongoClient(creds['host'], creds['port'])
            tdb = connection[creds['database']]
            tdb.authenticate(creds['admin_user'], creds['admin_password'])

            props = {
                "calculations": 1,
                "task_id": 1,
                "state": 1,
                "pseudo_potential": 1,
                "run_type": 1,
                "is_hubbard": 1,
                "hubbards": 1,
                "unit_cell_formula": 1
            }
            m_task = tdb.tasks.find_one({"dir_name": block_part}, props)
            if not m_task:
                time.sleep(
                    60)  # only thing to think of is wait for DB insertion(?)
                m_task = tdb.tasks.find_one({"dir_name": block_part}, props)

            if not m_task:
                raise ValueError(
                    "Could not find task with dir_name: {}".format(block_part))

            if m_task['state'] != 'successful':
                raise ValueError(
                    "Cannot run Boltztrap; parent job unsuccessful")

            nelect = m_task['calculations'][0]['input']['parameters']['NELECT']
            bs_id = m_task['calculations'][0]['band_structure_fs_id']
            print bs_id, type(bs_id)
            fs = gridfs.GridFS(tdb, 'band_structure_fs')
            bs_dict = json.loads(fs.get(bs_id).read())
            bs_dict['structure'] = m_task['calculations'][0]['output'][
                'crystal']
            bs = BandStructure.from_dict(bs_dict)
            print("find previous run with block_part {}".format(block_part))
            print 'Band Structure found:', bool(bs)
            print(bs.as_dict())
            print("nelect: {}".format(nelect))

            # run Boltztrap
            doping = []
            for d in [1e16, 1e17, 1e18, 1e19, 1e20]:
                doping.extend([1 * d, 2.5 * d, 5 * d, 7.5 * d])
            doping.append(1e21)
            runner = BoltztrapRunner(bs, nelect, doping=doping)
            dir = runner.run(path_dir=os.getcwd())

            # put the data in the database
            bta = BoltztrapAnalyzer.from_files(dir)

            # 8/21/15 - Anubhav removed fs_id (also see line further below, ted['boltztrap_full_fs_id'] ...)
            # 8/21/15 - this is to save space in MongoDB, as well as non-use of full Boltztrap output (vs rerun)
            """
            data = bta.as_dict()
            data.update(get_meta_from_structure(bs._structure))
            data['snlgroup_id'] = fw_spec['snlgroup_id']
            data['run_tags'] = fw_spec['run_tags']
            data['snl'] = fw_spec['mpsnl']
            data['dir_name_full'] = dir
            data['dir_name'] = get_block_part(dir)
            data['task_id'] = m_task['task_id']
            del data['hall']  # remove because it is too large and not useful
            fs = gridfs.GridFS(tdb, "boltztrap_full_fs")
            btid = fs.put(json.dumps(jsanitize(data)))
            """

            # now for the "sanitized" data
            ted = bta.as_dict()
            del ted['seebeck']
            del ted['hall']
            del ted['kappa']
            del ted['cond']

            # ted['boltztrap_full_fs_id'] = btid
            ted['snlgroup_id'] = fw_spec['snlgroup_id']
            ted['run_tags'] = fw_spec['run_tags']
            ted['snl'] = fw_spec['mpsnl'].as_dict()
            ted['dir_name_full'] = dir
            ted['dir_name'] = get_block_part(dir)
            ted['task_id'] = m_task['task_id']

            ted['pf_doping'] = bta.get_power_factor(output='tensor',
                                                    relaxation_time=self.TAU)
            ted['zt_doping'] = bta.get_zt(output='tensor',
                                          relaxation_time=self.TAU,
                                          kl=self.KAPPAL)

            ted['pf_eigs'] = self.get_eigs(ted, 'pf_doping')
            ted['pf_best'] = self.get_extreme(ted, 'pf_eigs')
            ted['pf_best_dope18'] = self.get_extreme(ted,
                                                     'pf_eigs',
                                                     max_didx=3)
            ted['pf_best_dope19'] = self.get_extreme(ted,
                                                     'pf_eigs',
                                                     max_didx=4)
            ted['zt_eigs'] = self.get_eigs(ted, 'zt_doping')
            ted['zt_best'] = self.get_extreme(ted, 'zt_eigs')
            ted['zt_best_dope18'] = self.get_extreme(ted,
                                                     'zt_eigs',
                                                     max_didx=3)
            ted['zt_best_dope19'] = self.get_extreme(ted,
                                                     'zt_eigs',
                                                     max_didx=4)
            ted['seebeck_eigs'] = self.get_eigs(ted, 'seebeck_doping')
            ted['seebeck_best'] = self.get_extreme(ted, 'seebeck_eigs')
            ted['seebeck_best_dope18'] = self.get_extreme(ted,
                                                          'seebeck_eigs',
                                                          max_didx=3)
            ted['seebeck_best_dope19'] = self.get_extreme(ted,
                                                          'seebeck_eigs',
                                                          max_didx=4)
            ted['cond_eigs'] = self.get_eigs(ted, 'cond_doping')
            ted['cond_best'] = self.get_extreme(ted, 'cond_eigs')
            ted['cond_best_dope18'] = self.get_extreme(ted,
                                                       'cond_eigs',
                                                       max_didx=3)
            ted['cond_best_dope19'] = self.get_extreme(ted,
                                                       'cond_eigs',
                                                       max_didx=4)
            ted['kappa_eigs'] = self.get_eigs(ted, 'kappa_doping')
            ted['kappa_best'] = self.get_extreme(ted,
                                                 'kappa_eigs',
                                                 maximize=False)
            ted['kappa_best_dope18'] = self.get_extreme(ted,
                                                        'kappa_eigs',
                                                        maximize=False,
                                                        max_didx=3)
            ted['kappa_best_dope19'] = self.get_extreme(ted,
                                                        'kappa_eigs',
                                                        maximize=False,
                                                        max_didx=4)

            try:
                from mpcollab.thermoelectrics.boltztrap_TE import BoltzSPB
                bzspb = BoltzSPB(ted)
                maxpf_p = bzspb.get_maximum_power_factor(
                    'p',
                    temperature=0,
                    tau=1E-14,
                    ZT=False,
                    kappal=0.5,
                    otherprops=('get_seebeck_mu_eig',
                                'get_conductivity_mu_eig',
                                'get_thermal_conductivity_mu_eig',
                                'get_average_eff_mass_tensor_mu'))

                maxpf_n = bzspb.get_maximum_power_factor(
                    'n',
                    temperature=0,
                    tau=1E-14,
                    ZT=False,
                    kappal=0.5,
                    otherprops=('get_seebeck_mu_eig',
                                'get_conductivity_mu_eig',
                                'get_thermal_conductivity_mu_eig',
                                'get_average_eff_mass_tensor_mu'))

                maxzt_p = bzspb.get_maximum_power_factor(
                    'p',
                    temperature=0,
                    tau=1E-14,
                    ZT=True,
                    kappal=0.5,
                    otherprops=('get_seebeck_mu_eig',
                                'get_conductivity_mu_eig',
                                'get_thermal_conductivity_mu_eig',
                                'get_average_eff_mass_tensor_mu'))

                maxzt_n = bzspb.get_maximum_power_factor(
                    'n',
                    temperature=0,
                    tau=1E-14,
                    ZT=True,
                    kappal=0.5,
                    otherprops=('get_seebeck_mu_eig',
                                'get_conductivity_mu_eig',
                                'get_thermal_conductivity_mu_eig',
                                'get_average_eff_mass_tensor_mu'))

                ted['zt_best_finemesh'] = {'p': maxzt_p, 'n': maxzt_n}
                ted['pf_best_finemesh'] = {'p': maxpf_p, 'n': maxpf_n}
            except:
                import traceback
                traceback.print_exc()
                print 'COULD NOT GET FINE MESH DATA'

            # add is_compatible
            mpc = MaterialsProjectCompatibility("Advanced")
            try:
                func = m_task["pseudo_potential"]["functional"]
                labels = m_task["pseudo_potential"]["labels"]
                symbols = ["{} {}".format(func, label) for label in labels]
                parameters = {
                    "run_type": m_task["run_type"],
                    "is_hubbard": m_task["is_hubbard"],
                    "hubbards": m_task["hubbards"],
                    "potcar_symbols": symbols
                }
                entry = ComputedEntry(Composition(m_task["unit_cell_formula"]),
                                      0.0,
                                      0.0,
                                      parameters=parameters,
                                      entry_id=m_task["task_id"])

                ted["is_compatible"] = bool(mpc.process_entry(entry))
            except:
                traceback.print_exc()
                print 'ERROR in getting compatibility, task_id: {}'.format(
                    m_task["task_id"])
                ted["is_compatible"] = None

            tdb.boltztrap.insert(jsanitize(ted))

            update_spec = {
                'prev_vasp_dir': fw_spec['prev_vasp_dir'],
                'boltztrap_dir': os.getcwd(),
                'prev_task_type': fw_spec['task_type'],
                'mpsnl': fw_spec['mpsnl'].as_dict(),
                'snlgroup_id': fw_spec['snlgroup_id'],
                'run_tags': fw_spec['run_tags'],
                'parameters': fw_spec.get('parameters')
            }

        return FWAction(update_spec=update_spec)
Пример #8
0
    def assimilate(self, path, launches_coll=None):
        """
        Parses vasp runs. Then insert the result into the db. and return the
        task_id or doc of the insertion.

        Returns:
            If in simulate_mode, the entire doc is returned for debugging
            purposes. Else, only the task_id of the inserted doc is returned.
        """

        d = self.get_task_doc(path)
        if self.additional_fields:
            d.update(self.additional_fields)  # always add additional fields, even for failed jobs

        try:
            d["dir_name_full"] = d["dir_name"].split(":")[1]
            d["dir_name"] = get_block_part(d["dir_name_full"])
            d["stored_data"] = {}
        except:
            print 'COULD NOT GET DIR NAME'
            pprint.pprint(d)
            print traceback.format_exc()
            raise ValueError('IMPROPER PARSING OF {}'.format(path))

        if not self.simulate:
            # Perform actual insertion into db. Because db connections cannot
            # be pickled, every insertion needs to create a new connection
            # to the db.
            conn = MongoClient(self.host, self.port)
            db = conn[self.database]
            if self.user:
                db.authenticate(self.user, self.password)
            coll = db[self.collection]

            # Insert dos data into gridfs and then remove it from the dict.
            # DOS data tends to be above the 4Mb limit for mongo docs. A ref
            # to the dos file is in the dos_fs_id.
            result = coll.find_one({"dir_name": d["dir_name"]})

            if result is None or self.update_duplicates:
                if self.parse_dos and "calculations" in d:
                    for calc in d["calculations"]:
                        if "dos" in calc:
                            dos = json.dumps(calc["dos"], cls=MontyEncoder)
                            fs = gridfs.GridFS(db, "dos_fs")
                            dosid = fs.put(dos)
                            calc["dos_fs_id"] = dosid
                            del calc["dos"]

                d["last_updated"] = datetime.datetime.today()
                if result is None:
                    if ("task_id" not in d) or (not d["task_id"]):
                        d["task_id"] = "mp-{}".format(
                            db.counter.find_one_and_update(
                                {"_id": "taskid"}, {"$inc": {"c": 1}}
			    )["c"])
                    logger.info("Inserting {} with taskid = {}"
                    .format(d["dir_name"], d["task_id"]))
                elif self.update_duplicates:
                    d["task_id"] = result["task_id"]
                    logger.info("Updating {} with taskid = {}"
                    .format(d["dir_name"], d["task_id"]))

                #Fireworks processing

                self.process_fw(path, d)

                try:
                    #Add oxide_type
                    struct=Structure.from_dict(d["output"]["crystal"])
                    d["oxide_type"]=oxide_type(struct)
                except:
                    logger.error("can't get oxide_type for {}".format(d["task_id"]))
                    d["oxide_type"] = None

                #Override incorrect outcar subdocs for two step relaxations
                if "optimize structure" in d['task_type'] and \
                    os.path.exists(os.path.join(path, "relax2")):
                    try:
                        run_stats = {}
                        for i in [1,2]:
                            o_path = os.path.join(path,"relax"+str(i),"OUTCAR")
                            o_path = o_path if os.path.exists(o_path) else o_path+".gz"
                            outcar = Outcar(o_path)
                            d["calculations"][i-1]["output"]["outcar"] = outcar.as_dict()
                            run_stats["relax"+str(i)] = outcar.run_stats
                    except:
                        logger.error("Bad OUTCAR for {}.".format(path))

                    try:
                        overall_run_stats = {}
                        for key in ["Total CPU time used (sec)", "User time (sec)",
                                    "System time (sec)", "Elapsed time (sec)"]:
                            overall_run_stats[key] = sum([v[key]
                                              for v in run_stats.values()])
                        run_stats["overall"] = overall_run_stats
                    except:
                        logger.error("Bad run stats for {}.".format(path))

                    d["run_stats"] = run_stats

                # add is_compatible
                mpc = MaterialsProjectCompatibility("Advanced")

                try:
                    func = d["pseudo_potential"]["functional"]
                    labels = d["pseudo_potential"]["labels"]
                    symbols = ["{} {}".format(func, label) for label in labels]
                    parameters = {"run_type": d["run_type"],
                              "is_hubbard": d["is_hubbard"],
                              "hubbards": d["hubbards"],
                              "potcar_symbols": symbols}
                    entry = ComputedEntry(Composition(d["unit_cell_formula"]),
                                          0.0, 0.0, parameters=parameters,
                                          entry_id=d["task_id"])

                    d['is_compatible'] = bool(mpc.process_entry(entry))
                except:
                    traceback.print_exc()
                    print 'ERROR in getting compatibility'
                    d['is_compatible'] = None


                #task_type dependent processing
                if 'static' in d['task_type']:
                    launch_doc = launches_coll.find_one({"fw_id": d['fw_id'], "launch_dir": {"$regex": d["dir_name"]}}, {"action.stored_data": 1})
                    for i in ["conventional_standard_structure", "symmetry_operations",
                              "symmetry_dataset", "refined_structure"]:
                        try:
                            d['stored_data'][i] = launch_doc['action']['stored_data'][i]
                        except:
                            pass

                #parse band structure if necessary
                if ('band structure' in d['task_type'] or "Uniform" in d['task_type'])\
                    and d['state'] == 'successful':
                    launch_doc = launches_coll.find_one({"fw_id": d['fw_id'], "launch_dir": {"$regex": d["dir_name"]}},
                                                        {"action.stored_data": 1})
                    vasp_run = Vasprun(zpath(os.path.join(path, "vasprun.xml")), parse_projected_eigen=False)

                    if 'band structure' in d['task_type']:
                        def string_to_numlist(stringlist):
                            g=re.search('([0-9\-\.eE]+)\s+([0-9\-\.eE]+)\s+([0-9\-\.eE]+)', stringlist)
                            return [float(g.group(i)) for i in range(1,4)]

                        for i in ["kpath_name", "kpath"]:
                            d['stored_data'][i] = launch_doc['action']['stored_data'][i]
                        kpoints_doc = d['stored_data']['kpath']['kpoints']
                        for i in kpoints_doc:
                            kpoints_doc[i]=string_to_numlist(kpoints_doc[i])
                        bs=vasp_run.get_band_structure(efermi=d['calculations'][0]['output']['outcar']['efermi'],
                                                       line_mode=True)
                    else:
                        bs=vasp_run.get_band_structure(efermi=d['calculations'][0]['output']['outcar']['efermi'],
                                                       line_mode=False)
                    bs_json = json.dumps(bs.as_dict(), cls=MontyEncoder)
                    fs = gridfs.GridFS(db, "band_structure_fs")
                    bs_id = fs.put(bs_json)
                    d['calculations'][0]["band_structure_fs_id"] = bs_id

                    # also override band gap in task doc
                    gap = bs.get_band_gap()
                    vbm = bs.get_vbm()
                    cbm = bs.get_cbm()
                    update_doc = {'bandgap': gap['energy'], 'vbm': vbm['energy'], 'cbm': cbm['energy'], 'is_gap_direct': gap['direct']}
                    d['analysis'].update(update_doc)
                    d['calculations'][0]['output'].update(update_doc)

		coll.update_one({"dir_name": d["dir_name"]}, {'$set': d}, upsert=True)

                return d["task_id"], d
            else:
                logger.info("Skipping duplicate {}".format(d["dir_name"]))
                return result["task_id"], result

        else:
            d["task_id"] = 0
            logger.info("Simulated insert into database for {} with task_id {}"
            .format(d["dir_name"], d["task_id"]))
            return 0, d
Пример #9
0
    def assimilate(self, path, launches_coll=None):
        """
        Parses vasp runs. Then insert the result into the db. and return the
        task_id or doc of the insertion.

        Returns:
            If in simulate_mode, the entire doc is returned for debugging
            purposes. Else, only the task_id of the inserted doc is returned.
        """

        d = self.get_task_doc(path)
        if self.additional_fields:
            d.update(self.additional_fields
                     )  # always add additional fields, even for failed jobs

        try:
            d["dir_name_full"] = d["dir_name"].split(":")[1]
            d["dir_name"] = get_block_part(d["dir_name_full"])
            d["stored_data"] = {}
        except:
            print 'COULD NOT GET DIR NAME'
            pprint.pprint(d)
            print traceback.format_exc()
            raise ValueError('IMPROPER PARSING OF {}'.format(path))

        if not self.simulate:
            # Perform actual insertion into db. Because db connections cannot
            # be pickled, every insertion needs to create a new connection
            # to the db.
            conn = MongoClient(self.host, self.port)
            db = conn[self.database]
            if self.user:
                db.authenticate(self.user, self.password)
            coll = db[self.collection]

            # Insert dos data into gridfs and then remove it from the dict.
            # DOS data tends to be above the 4Mb limit for mongo docs. A ref
            # to the dos file is in the dos_fs_id.
            result = coll.find_one({"dir_name": d["dir_name"]})

            if result is None or self.update_duplicates:
                if self.parse_dos and "calculations" in d:
                    for calc in d["calculations"]:
                        if "dos" in calc:
                            dos = json.dumps(calc["dos"], cls=MontyEncoder)
                            fs = gridfs.GridFS(db, "dos_fs")
                            dosid = fs.put(dos)
                            calc["dos_fs_id"] = dosid
                            del calc["dos"]

                d["last_updated"] = datetime.datetime.today()
                if result is None:
                    if ("task_id" not in d) or (not d["task_id"]):
                        d["task_id"] = "mp-{}".format(
                            db.counter.find_one_and_update({"_id": "taskid"},
                                                           {"$inc": {
                                                               "c": 1
                                                           }})["c"])
                    logger.info("Inserting {} with taskid = {}".format(
                        d["dir_name"], d["task_id"]))
                elif self.update_duplicates:
                    d["task_id"] = result["task_id"]
                    logger.info("Updating {} with taskid = {}".format(
                        d["dir_name"], d["task_id"]))

                #Fireworks processing

                self.process_fw(path, d)

                try:
                    #Add oxide_type
                    struct = Structure.from_dict(d["output"]["crystal"])
                    d["oxide_type"] = oxide_type(struct)
                except:
                    logger.error("can't get oxide_type for {}".format(
                        d["task_id"]))
                    d["oxide_type"] = None

                #Override incorrect outcar subdocs for two step relaxations
                if "optimize structure" in d['task_type'] and \
                    os.path.exists(os.path.join(path, "relax2")):
                    try:
                        run_stats = {}
                        for i in [1, 2]:
                            o_path = os.path.join(path, "relax" + str(i),
                                                  "OUTCAR")
                            o_path = o_path if os.path.exists(
                                o_path) else o_path + ".gz"
                            outcar = Outcar(o_path)
                            d["calculations"][
                                i - 1]["output"]["outcar"] = outcar.as_dict()
                            run_stats["relax" + str(i)] = outcar.run_stats
                    except:
                        logger.error("Bad OUTCAR for {}.".format(path))

                    try:
                        overall_run_stats = {}
                        for key in [
                                "Total CPU time used (sec)", "User time (sec)",
                                "System time (sec)", "Elapsed time (sec)"
                        ]:
                            overall_run_stats[key] = sum(
                                [v[key] for v in run_stats.values()])
                        run_stats["overall"] = overall_run_stats
                    except:
                        logger.error("Bad run stats for {}.".format(path))

                    d["run_stats"] = run_stats

                # add is_compatible
                mpc = MaterialsProjectCompatibility("Advanced")

                try:
                    func = d["pseudo_potential"]["functional"]
                    labels = d["pseudo_potential"]["labels"]
                    symbols = ["{} {}".format(func, label) for label in labels]
                    parameters = {
                        "run_type": d["run_type"],
                        "is_hubbard": d["is_hubbard"],
                        "hubbards": d["hubbards"],
                        "potcar_symbols": symbols
                    }
                    entry = ComputedEntry(Composition(d["unit_cell_formula"]),
                                          0.0,
                                          0.0,
                                          parameters=parameters,
                                          entry_id=d["task_id"])

                    d['is_compatible'] = bool(mpc.process_entry(entry))
                except:
                    traceback.print_exc()
                    print 'ERROR in getting compatibility'
                    d['is_compatible'] = None

                #task_type dependent processing
                if 'static' in d['task_type']:
                    launch_doc = launches_coll.find_one(
                        {
                            "fw_id": d['fw_id'],
                            "launch_dir": {
                                "$regex": d["dir_name"]
                            }
                        }, {"action.stored_data": 1})
                    for i in [
                            "conventional_standard_structure",
                            "symmetry_operations", "symmetry_dataset",
                            "refined_structure"
                    ]:
                        try:
                            d['stored_data'][i] = launch_doc['action'][
                                'stored_data'][i]
                        except:
                            pass

                #parse band structure if necessary
                if ('band structure' in d['task_type'] or "Uniform" in d['task_type'])\
                    and d['state'] == 'successful':
                    launch_doc = launches_coll.find_one(
                        {
                            "fw_id": d['fw_id'],
                            "launch_dir": {
                                "$regex": d["dir_name"]
                            }
                        }, {"action.stored_data": 1})
                    vasp_run = Vasprun(zpath(os.path.join(path,
                                                          "vasprun.xml")),
                                       parse_projected_eigen=True)

                    if 'band structure' in d['task_type']:

                        def string_to_numlist(stringlist):
                            g = re.search(
                                '([0-9\-\.eE]+)\s+([0-9\-\.eE]+)\s+([0-9\-\.eE]+)',
                                stringlist)
                            return [float(g.group(i)) for i in range(1, 4)]

                        for i in ["kpath_name", "kpath"]:
                            d['stored_data'][i] = launch_doc['action'][
                                'stored_data'][i]
                        kpoints_doc = d['stored_data']['kpath']['kpoints']
                        for i in kpoints_doc:
                            if isinstance(kpoints_doc[i], six.string_types):
                                kpoints_doc[i] = string_to_numlist(
                                    kpoints_doc[i])
                        bs = vasp_run.get_band_structure(
                            efermi=d['calculations'][0]['output']['outcar']
                            ['efermi'],
                            line_mode=True)
                    else:
                        bs = vasp_run.get_band_structure(
                            efermi=d['calculations'][0]['output']['outcar']
                            ['efermi'],
                            line_mode=False)
                    bs_json = json.dumps(bs.as_dict(), cls=MontyEncoder)
                    fs = gridfs.GridFS(db, "band_structure_fs")
                    bs_id = fs.put(bs_json)
                    d['calculations'][0]["band_structure_fs_id"] = bs_id

                    # also override band gap in task doc
                    gap = bs.get_band_gap()
                    vbm = bs.get_vbm()
                    cbm = bs.get_cbm()
                    update_doc = {
                        'bandgap': gap['energy'],
                        'vbm': vbm['energy'],
                        'cbm': cbm['energy'],
                        'is_gap_direct': gap['direct']
                    }
                    d['analysis'].update(update_doc)
                    d['calculations'][0]['output'].update(update_doc)

                coll.update_one({"dir_name": d["dir_name"]}, {'$set': d},
                                upsert=True)

                return d["task_id"], d
            else:
                logger.info("Skipping duplicate {}".format(d["dir_name"]))
                return result["task_id"], result

        else:
            d["task_id"] = 0
            logger.info(
                "Simulated insert into database for {} with task_id {}".format(
                    d["dir_name"], d["task_id"]))
            return 0, d
Пример #10
0
    def run_task(self, fw_spec):

        # get the band structure and nelect from files
        """
        prev_dir = get_loc(fw_spec['prev_vasp_dir'])
        vasprun_loc = zpath(os.path.join(prev_dir, 'vasprun.xml'))
        kpoints_loc = zpath(os.path.join(prev_dir, 'KPOINTS'))

        vr = Vasprun(vasprun_loc)
        bs = vr.get_band_structure(kpoints_filename=kpoints_loc)
        """

        # get the band structure and nelect from DB
        block_part = get_block_part(fw_spec['prev_vasp_dir'])

        db_dir = os.environ['DB_LOC']
        assert isinstance(db_dir, object)
        db_path = os.path.join(db_dir, 'tasks_db.json')
        with open(db_path) as f:
            creds = json.load(f)
            connection = MongoClient(creds['host'], creds['port'])
            tdb = connection[creds['database']]
            tdb.authenticate(creds['admin_user'], creds['admin_password'])

            m_task = tdb.tasks.find_one({"dir_name": block_part}, {
                "calculations": 1,
                "task_id": 1
            })
            nelect = m_task['calculations'][0]['input']['parameters']['NELECT']
            bs_id = m_task['calculations'][0]['band_structure_fs_id']
            print bs_id, type(bs_id)
            fs = gridfs.GridFS(tdb, 'band_structure_fs')
            bs_dict = json.loads(fs.get(bs_id).read())
            bs_dict['structure'] = m_task['calculations'][0]['output'][
                'crystal']
            bs = BandStructure.from_dict(bs_dict)
            print 'Band Structure found:', bool(bs)
            print nelect

            # run Boltztrap
            runner = BoltztrapRunner(bs, nelect)
            dir = runner.run(path_dir=os.getcwd())

            # put the data in the database
            bta = BoltztrapAnalyzer.from_files(dir)
            data = bta.to_dict
            data.update(get_meta_from_structure(bs._structure))
            data['snlgroup_id'] = fw_spec['snlgroup_id']
            data['run_tags'] = fw_spec['run_tags']
            data['snl'] = fw_spec['mpsnl']
            data['dir_name_full'] = dir
            data['dir_name'] = get_block_part(dir)
            data['task_id'] = m_task['task_id']
            data['hall'] = {}  # remove because it is too large and not useful
            data['hall_doping'] = {
            }  # remove because it is too large and not useful
            tdb.boltztrap.insert(clean_json(data))

        update_spec = {
            'prev_vasp_dir': fw_spec['prev_vasp_dir'],
            'boltztrap_dir': os.getcwd(),
            'prev_task_type': fw_spec['task_type'],
            'mpsnl': fw_spec['mpsnl'],
            'snlgroup_id': fw_spec['snlgroup_id'],
            'run_tags': fw_spec['run_tags'],
            'parameters': fw_spec.get('parameters')
        }

        return FWAction(update_spec=update_spec)
Пример #11
0
    def assimilate(self, path, launches_coll=None):
        """
        Parses vasp runs. Then insert the result into the db. and return the
        task_id or doc of the insertion.

        Returns:
            If in simulate_mode, the entire doc is returned for debugging
            purposes. Else, only the task_id of the inserted doc is returned.
        """

        d = self.get_task_doc(path, self.parse_dos,
                              self.additional_fields)

        try:
            d["dir_name_full"] = d["dir_name"].split(":")[1]
            d["dir_name"] = get_block_part(d["dir_name_full"])
            d["stored_data"] = {}
        except:
            print 'COULD NOT GET DIR NAME'
            pprint.pprint(d)
            print traceback.format_exc()
            raise ValueError('IMPROPER PARSING OF {}'.format(path))

        if not self.simulate:
            # Perform actual insertion into db. Because db connections cannot
            # be pickled, every insertion needs to create a new connection
            # to the db.
            conn = MongoClient(self.host, self.port)
            db = conn[self.database]
            if self.user:
                db.authenticate(self.user, self.password)
            coll = db[self.collection]

            # Insert dos data into gridfs and then remove it from the dict.
            # DOS data tends to be above the 4Mb limit for mongo docs. A ref
            # to the dos file is in the dos_fs_id.
            result = coll.find_one({"dir_name": d["dir_name"]})

            if result is None or self.update_duplicates:
                if self.parse_dos and "calculations" in d:
                    for calc in d["calculations"]:
                        if "dos" in calc:
                            dos = json.dumps(calc["dos"])
                            fs = gridfs.GridFS(db, "dos_fs")
                            dosid = fs.put(dos)
                            calc["dos_fs_id"] = dosid
                            del calc["dos"]

                d["last_updated"] = datetime.datetime.today()
                if result is None:
                    if ("task_id" not in d) or (not d["task_id"]):
                        d["task_id"] = "mp-{}".format(
                            db.counter.find_and_modify(
                                query={"_id": "taskid"},
                                update={"$inc": {"c": 1}})["c"])
                    logger.info("Inserting {} with taskid = {}"
                    .format(d["dir_name"], d["task_id"]))
                elif self.update_duplicates:
                    d["task_id"] = result["task_id"]
                    logger.info("Updating {} with taskid = {}"
                    .format(d["dir_name"], d["task_id"]))

                #Fireworks processing

                self.process_fw(path, d)

                #Override incorrect outcar subdocs for two step relaxations
                if "optimize structure" in d['task_type'] and \
                    os.path.exists(os.path.join(path, "relax2")):
                    try:
                        run_stats = {}
                        for i in [1,2]:
                            outcar = Outcar(os.path.join(path,"relax"+str(i),"OUTCAR"))
                            d["calculations"][i-1]["output"]["outcar"] = outcar.to_dict
                            run_stats["relax"+str(i)] = outcar.run_stats
                    except:
                        logger.error("Bad OUTCAR for {}.".format(path))

                    try:
                        overall_run_stats = {}
                        for key in ["Total CPU time used (sec)", "User time (sec)",
                                    "System time (sec)", "Elapsed time (sec)"]:
                            overall_run_stats[key] = sum([v[key]
                                              for v in run_stats.values()])
                        run_stats["overall"] = overall_run_stats
                    except:
                        logger.error("Bad run stats for {}.".format(path))

                    d["run_stats"] = run_stats

                #task_type dependent processing
                if 'static' in d['task_type']:
                    launch_doc = launches_coll.find_one({"fw_id": d['fw_id'], "launch_dir": {"$regex": d["dir_name"]}}, {"action.stored_data": 1})
                    for i in ["conventional_standard_structure", "symmetry_operations",
                              "symmetry_dataset", "refined_structure"]:
                        try:
                            d['stored_data'][i] = launch_doc['action']['stored_data'][i]
                        except:
                            pass

                #parse band structure if necessary
                if ('band structure' in d['task_type'] or "Uniform" in d['task_type'])\
                    and d['state'] == 'successful':
                    launch_doc = launches_coll.find_one({"fw_id": d['fw_id'], "launch_dir": {"$regex": d["dir_name"]}},
                                                        {"action.stored_data": 1})
                    vasp_run = Vasprun(os.path.join(path, "vasprun.xml"), parse_projected_eigen=False)

                    if 'band structure' in d['task_type']:
                        def string_to_numlist(stringlist):
                            g=re.search('([0-9\-\.eE]+)\s+([0-9\-\.eE]+)\s+([0-9\-\.eE]+)', stringlist)
                            return [float(g.group(i)) for i in range(1,4)]

                        for i in ["kpath_name", "kpath"]:
                            d['stored_data'][i] = launch_doc['action']['stored_data'][i]
                        kpoints_doc = d['stored_data']['kpath']['kpoints']
                        for i in kpoints_doc:
                            kpoints_doc[i]=string_to_numlist(kpoints_doc[i])
                        bs=vasp_run.get_band_structure(efermi=d['calculations'][0]['output']['outcar']['efermi'],
                                                       line_mode=True)
                    else:
                        bs=vasp_run.get_band_structure(efermi=d['calculations'][0]['output']['outcar']['efermi'],
                                                       line_mode=False)
                    bs_json = json.dumps(bs.to_dict)
                    fs = gridfs.GridFS(db, "band_structure_fs")
                    bs_id = fs.put(bs_json)
                    d['calculations'][0]["band_structure_fs_id"] = bs_id

                coll.update({"dir_name": d["dir_name"]}, d, upsert=True)

                return d["task_id"], d
            else:
                logger.info("Skipping duplicate {}".format(d["dir_name"]))
                return result["task_id"], result

        else:
            d["task_id"] = 0
            logger.info("Simulated insert into database for {} with task_id {}"
            .format(d["dir_name"], d["task_id"]))
            return 0, d
Пример #12
0
    def run_task(self, fw_spec):
        if '_fizzled_parents' in fw_spec and not 'prev_vasp_dir' in fw_spec:
            prev_dir = get_loc(fw_spec['_fizzled_parents'][0]['launches'][0]['launch_dir'])
            update_spec = {}
            fizzled_parent = True
            parse_dos = False
        else:
            prev_dir = get_loc(fw_spec['prev_vasp_dir'])
            update_spec = {'prev_vasp_dir': get_block_part(prev_dir),
                           'prev_task_type': fw_spec['prev_task_type'],
                           'run_tags': fw_spec['run_tags']}
            self.additional_fields['run_tags'] = fw_spec['run_tags']
            fizzled_parent = False
            parse_dos = 'Uniform' in fw_spec['prev_task_type']

        if MOVE_TO_GARDEN_DEV:
            prev_dir = move_to_garden(prev_dir, prod=False)

        elif MOVE_TO_GARDEN_PROD:
            prev_dir = move_to_garden(prev_dir, prod=True)

        # get the directory containing the db file
        db_dir = os.environ['DB_LOC']
        db_path = os.path.join(db_dir, 'tasks_db.json')

        logging.basicConfig(level=logging.INFO)
        logger = logging.getLogger('MPVaspDrone')
        logger.setLevel(logging.INFO)
        sh = logging.StreamHandler(stream=sys.stdout)
        sh.setLevel(getattr(logging, 'INFO'))
        logger.addHandler(sh)

        with open(db_path) as f:
            db_creds = json.load(f)
            drone = MPVaspDrone(
                host=db_creds['host'], port=db_creds['port'],
                database=db_creds['database'], user=db_creds['admin_user'],
                password=db_creds['admin_password'],
                collection=db_creds['collection'], parse_dos=parse_dos,
                additional_fields=self.additional_fields,
                update_duplicates=self.update_duplicates)
            t_id, d = drone.assimilate(prev_dir, launches_coll=LaunchPad.auto_load().launches)

        mpsnl = d['snl_final'] if 'snl_final' in d else d['snl']
        snlgroup_id = d['snlgroup_id_final'] if 'snlgroup_id_final' in d else d['snlgroup_id']
        update_spec.update({'mpsnl': mpsnl, 'snlgroup_id': snlgroup_id})

        print 'ENTERED task id:', t_id
        stored_data = {'task_id': t_id}
        if d['state'] == 'successful':
            update_spec['analysis'] = d['analysis']
            update_spec['output'] = d['output']
            return FWAction(stored_data=stored_data, update_spec=update_spec)

        # not successful - first test to see if UnconvergedHandler is needed
        if not fizzled_parent:
            unconverged_tag = 'unconverged_handler--{}'.format(fw_spec['prev_task_type'])
            output_dir = last_relax(os.path.join(prev_dir, 'vasprun.xml'))
            ueh = UnconvergedErrorHandler(output_filename=output_dir)
            if ueh.check() and unconverged_tag not in fw_spec['run_tags']:
                print 'Unconverged run! Creating dynamic FW...'

                spec = {'prev_vasp_dir': get_block_part(prev_dir),
                        'prev_task_type': fw_spec['task_type'],
                        'mpsnl': mpsnl, 'snlgroup_id': snlgroup_id,
                        'task_type': fw_spec['prev_task_type'],
                        'run_tags': list(fw_spec['run_tags']),
                        '_dupefinder': DupeFinderVasp().to_dict(),
                        '_priority': fw_spec['_priority']}

                snl = StructureNL.from_dict(spec['mpsnl'])
                spec['run_tags'].append(unconverged_tag)
                spec['_queueadapter'] = QA_VASP

                fws = []
                connections = {}

                f = Composition.from_formula(
                    snl.structure.composition.reduced_formula).alphabetical_formula

                fws.append(FireWork(
                    [VaspCopyTask({'files': ['INCAR', 'KPOINTS', 'POSCAR', 'POTCAR', 'CONTCAR'],
                                   'use_CONTCAR': False}), SetupUnconvergedHandlerTask(),
                     get_custodian_task(spec)], spec, name=get_slug(f + '--' + spec['task_type']),
                    fw_id=-2))

                spec = {'task_type': 'VASP db insertion', '_allow_fizzled_parents': True,
                        '_priority': fw_spec['_priority'], '_queueadapter': QA_DB,
                        'run_tags': list(fw_spec['run_tags'])}
                spec['run_tags'].append(unconverged_tag)
                fws.append(
                    FireWork([VaspToDBTask()], spec, name=get_slug(f + '--' + spec['task_type']),
                             fw_id=-1))
                connections[-2] = -1

                wf = Workflow(fws, connections)

                return FWAction(detours=wf)

        # not successful and not due to convergence problem - FIZZLE
        raise ValueError("DB insertion successful, but don't know how to fix this FireWork! Can't continue with workflow...")
Пример #13
0
def detect():
    for d in glob.glob(os.path.join(SCRATCH_PATH, 'block*/launch*')):
        block_part = get_block_part(d)
        garden_dir = os.path.join(GARDEN_PATH, block_part)
        if os.path.exists(garden_dir):
            print garden_dir
Пример #14
0
    def run_task(self, fw_spec):
        if '_fizzled_parents' in fw_spec and not 'prev_vasp_dir' in fw_spec:
            prev_dir = get_loc(
                fw_spec['_fizzled_parents'][0]['launches'][0]['launch_dir'])
            update_spec = {}
            fizzled_parent = True
            parse_dos = False
        else:
            prev_dir = get_loc(fw_spec['prev_vasp_dir'])
            update_spec = {
                'prev_vasp_dir': get_block_part(prev_dir),
                'prev_task_type': fw_spec['prev_task_type'],
                'run_tags': fw_spec['run_tags']
            }
            self.additional_fields['run_tags'] = fw_spec['run_tags']
            fizzled_parent = False
            parse_dos = 'Uniform' in fw_spec['prev_task_type']

        if MOVE_TO_GARDEN_DEV:
            prev_dir = move_to_garden(prev_dir, prod=False)

        elif MOVE_TO_GARDEN_PROD:
            prev_dir = move_to_garden(prev_dir, prod=True)

        # get the directory containing the db file
        db_dir = os.environ['DB_LOC']
        db_path = os.path.join(db_dir, 'tasks_db.json')

        logging.basicConfig(level=logging.INFO)
        logger = logging.getLogger('MPVaspDrone')
        logger.setLevel(logging.INFO)
        sh = logging.StreamHandler(stream=sys.stdout)
        sh.setLevel(getattr(logging, 'INFO'))
        logger.addHandler(sh)

        with open(db_path) as f:
            db_creds = json.load(f)
            drone = MPVaspDrone(host=db_creds['host'],
                                port=db_creds['port'],
                                database=db_creds['database'],
                                user=db_creds['admin_user'],
                                password=db_creds['admin_password'],
                                collection=db_creds['collection'],
                                parse_dos=parse_dos,
                                additional_fields=self.additional_fields,
                                update_duplicates=self.update_duplicates)
            t_id, d = drone.assimilate(
                prev_dir, launches_coll=LaunchPad.auto_load().launches)

        mpsnl = d['snl_final'] if 'snl_final' in d else d['snl']
        snlgroup_id = d['snlgroup_id_final'] if 'snlgroup_id_final' in d else d[
            'snlgroup_id']
        update_spec.update({'mpsnl': mpsnl, 'snlgroup_id': snlgroup_id})

        print 'ENTERED task id:', t_id
        stored_data = {'task_id': t_id}
        if d['state'] == 'successful':
            update_spec['analysis'] = d['analysis']
            update_spec['output'] = d['output']
            return FWAction(stored_data=stored_data, update_spec=update_spec)

        # not successful - first test to see if UnconvergedHandler is needed
        if not fizzled_parent:
            unconverged_tag = 'unconverged_handler--{}'.format(
                fw_spec['prev_task_type'])
            output_dir = last_relax(os.path.join(prev_dir, 'vasprun.xml'))
            ueh = UnconvergedErrorHandler(output_filename=output_dir)
            if ueh.check() and unconverged_tag not in fw_spec['run_tags']:
                print 'Unconverged run! Creating dynamic FW...'

                spec = {
                    'prev_vasp_dir': get_block_part(prev_dir),
                    'prev_task_type': fw_spec['task_type'],
                    'mpsnl': mpsnl,
                    'snlgroup_id': snlgroup_id,
                    'task_type': fw_spec['prev_task_type'],
                    'run_tags': list(fw_spec['run_tags']),
                    '_dupefinder': DupeFinderVasp().to_dict(),
                    '_priority': fw_spec['_priority']
                }

                snl = StructureNL.from_dict(spec['mpsnl'])
                spec['run_tags'].append(unconverged_tag)
                spec['_queueadapter'] = QA_VASP

                fws = []
                connections = {}

                f = Composition.from_formula(
                    snl.structure.composition.reduced_formula
                ).alphabetical_formula

                fws.append(
                    FireWork([
                        VaspCopyTask({
                            'files': [
                                'INCAR', 'KPOINTS', 'POSCAR', 'POTCAR',
                                'CONTCAR'
                            ],
                            'use_CONTCAR':
                            False
                        }),
                        SetupUnconvergedHandlerTask(),
                        get_custodian_task(spec)
                    ],
                             spec,
                             name=get_slug(f + '--' + spec['task_type']),
                             fw_id=-2))

                spec = {
                    'task_type': 'VASP db insertion',
                    '_allow_fizzled_parents': True,
                    '_priority': fw_spec['_priority'],
                    '_queueadapter': QA_DB,
                    'run_tags': list(fw_spec['run_tags'])
                }
                spec['run_tags'].append(unconverged_tag)
                fws.append(
                    FireWork([VaspToDBTask()],
                             spec,
                             name=get_slug(f + '--' + spec['task_type']),
                             fw_id=-1))
                connections[-2] = -1

                wf = Workflow(fws, connections)

                return FWAction(detours=wf)

        # not successful and not due to convergence problem - FIZZLE
        raise ValueError(
            "DB insertion successful, but don't know how to fix this FireWork! Can't continue with workflow..."
        )
Пример #15
0
def detect():
    for d in glob.glob(os.path.join(SCRATCH_PATH, 'block*/launch*')):
        block_part = get_block_part(d)
        garden_dir = os.path.join(GARDEN_PATH, block_part)
        if os.path.exists(garden_dir):
            print garden_dir