Пример #1
0
    def run_task(self, fw_spec):
        db_ids_lst = self["db_ids_lst"]
        ext_db = self.get("ext_db", None)
        if ext_db == None:
            ext_db = get_external_database(fw_spec["extdb_connect"])
        workflow_id = fw_spec.get("workflow", {"_id": -1}).get("_id", -1)
        logging.debug(fw_spec)
        update_spec = fw_spec

        calc_ids = []
        for db_id in db_ids_lst:
            simulation = ext_db["simulations"].find_one({"_id": int(db_id)})
            simulation["nanoclusters"][0]["reference_id"] = int(db_id)
            simulation["source_id"] = db_id
            simulation["workflow_id"] = workflow_id

            dct = update_simulations_collection(
                extdb_connect=fw_spec["extdb_connect"], **simulation)
            # update internal workflow data
            simulation_id = dct["_id"]

            ## old update simulation internally.
            # update_spec["simulations"][str(simulation_id)] = dct
            calc_ids.append(simulation_id)

        # update temp workflow data
        update_spec["temp"]["calc_ids"] = calc_ids
        update_spec.pop("_category")
        update_spec.pop("name")
        return FWAction(update_spec=update_spec)
Пример #2
0
    def run_task(self, fw_spec):
        ase_atoms_lst = self["ase_atoms_lst"]
        workflow_id = fw_spec.get("workflow", {"_id": -1}).get("_id", -1)
        logging.debug(fw_spec)
        update_spec = fw_spec

        calc_ids = []
        for atoms_dict in ase_atoms_lst:
            atoms = atoms_dict_to_ase(atoms_dict)
            dct = atoms.info
            total_energy = dct.get(
                "E",
                dct.get(
                    "energy",
                    dct.get(
                        "total_energy",
                        dct.get("TotalEnergy", dct.get("totalenergy",
                                                       "UNKNOWN")))))
            if total_energy == "UNKNOWN":
                logging.warning(
                    "total energy of cluster not specified in structure file")
            nanocluster_atom_ids = list(range(len(atoms)))

            db = get_external_database(fw_spec["extdb_connect"])
            simulations = db['simulations']
            # request id counter
            simulation_id = _query_id_counter_and_increment('simulations', db)

            nanocluster = {
                "atom_ids": nanocluster_atom_ids,
                "reference_id": simulation_id
            }

            dct = {
                "_id": simulation_id,
                "atoms": atoms_dict,
                "source_id": -1,
                "workflow_id": workflow_id,
                "nanoclusters": [nanocluster],
                "adsorbates": [],
                "substrates": [],
                "operations": [],
                "inp": {},
                "output": {
                    "total_energy": total_energy
                },
            }
            # update external database
            simulations.insert_one(dct)
            calc_ids.append(simulation_id)

        # update temp workflow data
        update_spec["temp"]["calc_ids"] = calc_ids
        update_spec.pop("_category")
        update_spec.pop("name")
        return FWAction(update_spec=update_spec)
Пример #3
0
    def run_task(self, fw_spec):
        workflow_id = fw_spec.get("workflow", {"_id": -1}).get("_id", -1)
        n_initial_configurations = self["n_initial_configurations"]
        n_configurations = self["n_configurations"]
        shape = self["shape"]
        nanocluster_size = self["nanocluster_size"]
        compositions = self["compositions"]
        elements = self["elements"]
        generate_pure_nanoclusters = self["generate_pure_nanoclusters"],
        bondlength_dct = self["bondlength_dct"]

        db = get_external_database(fw_spec["extdb_connect"])
        simulations = db['simulations']

        # generate clusters
        nanoclusters, calc_ids = self.generate(
            n_initial_configurations,
            n_configurations,
            shape,
            nanocluster_size,
            compositions,
            elements,
            generate_pure_nanoclusters=generate_pure_nanoclusters,
            bondlength_dct=bondlength_dct,
            db=db,
            workflow_id=workflow_id)

        # upload all simulations at once
        simulations.insert_many(nanoclusters)

        # fireworks
        update_spec = fw_spec
        update_spec["calc_ids"] = calc_ids

        update_spec.pop("_category")
        update_spec.pop("name")
        return FWAction(update_spec=update_spec)
Пример #4
0
    def run_task(self, fw_spec):
        descriptor = self["descriptor"]
        descriptor_params = self["descriptor_params"]
        adsorbate_name = self["adsorbate_name"]
        adsite_types = self["adsite_types"]
        reference_energy = self["reference_energy"]
        calc_ids = fw_spec["temp"]["calc_ids"]
        simulations = fetch_simulations(fw_spec["extdb_connect"], calc_ids)
        workflow_id = fw_spec.get("workflow", {"_id" : -1 }).get("_id", -1)
        update_spec = fw_spec

        logging.debug(fw_spec)
        desc_lst = []
        new_calc_ids = []
        db = get_external_database(fw_spec["extdb_connect"])

        # create reference of adsorbate in order to store its total energy
        # for later constructing adsorption energies
        reference_simulation = update_simulations_collection(extdb_connect = fw_spec["extdb_connect"], atoms = {}, 
            source_id = -1, workflow_id = workflow_id, 
            nanoclusters = [], adsorbates = [], substrates = [], 
            operations = [""], inp = {"adsorbate_name" : adsorbate_name}, 
            output = {"total_energy" : reference_energy},)
        reference_id = reference_simulation["_id"]


        all_atomtypes = gather_all_atom_types(calc_ids, simulations)

        # looping over nc atoms 
        for idx, calc_id in enumerate(calc_ids):
            simulations_chunk_list = []
            ##
            # get source simulation
            source_simulation = copy.deepcopy(simulations[str(calc_id)])
            atoms_dict = source_simulation["atoms"]
            atoms = atoms_dict_to_ase(atoms_dict)
            logging.debug(atoms)

            # running cluskit on cluster
            cluster = cluskit.Cluster(atoms)
            cluster.get_surface_atoms()
            descriptor_setup = _setup_descriptor(all_atomtypes, descriptor, **descriptor_params)
            #descriptor_setup = dscribe.descriptors.SOAP(species = all_atomtypes, 
            #    nmax = 9, lmax = 6, rcut=5.0, crossover = True, sparse = False)
            cluster.descriptor_setup = descriptor_setup

            #looping over adsorption site type
            for adsite_type in adsite_types:
                if adsite_type == "top":
                    adsite_type_int = 1
                elif adsite_type == "bridge":
                    adsite_type_int = 2

                elif adsite_type == "hollow":
                    adsite_type_int = 3
                else:
                    logging.error("adsorption site type unknown, known types are: top, bridge, hollow")
                    exit(1)
                # get adsorption sites for a nanocluster
                adspos = cluster.get_sites(adsite_type_int)
                sites_surface_atoms = cluster.site_surface_atom_ids[adsite_type_int]

                # get descriptor
                desc = cluster.get_sites_descriptor(adsite_type_int)
                for i in range(desc.shape[0]):
                    desc_lst.append(desc[i])


                adsorbate_lst = adsorbate_pos_to_atoms_lst(adspos, adsorbate_name)
                #loop over each adsorbate
                for adsorbate, surface_atoms in zip(adsorbate_lst, sites_surface_atoms):

                    #adsites_dict 
                    joint_atoms, cluster_ids, adsorbate_ids = join_cluster_adsorbate(atoms, adsorbate)
                    joint_atoms_dict = ase_to_atoms_dict(joint_atoms)

                    # update external database
                    dct = copy.deepcopy(source_simulation)
                    # calculation originated from this:
                    dct["source_id"] = calc_id
                    dct["workflow_id"] = workflow_id
                    dct["atoms"] = joint_atoms_dict
                    dct["operations"] = [dict({"add_adsorbate" : 1})]


                    dct["adsorbates"].append(dict({"atom_ids" : adsorbate_ids, "reference_id" : reference_id, "site_class" : adsite_type, "site_ids" : surface_atoms.tolist()}))
                    # empty previous input
                    dct["inp"] = {}
                    dct["inp"]["adsite_type"] = adsite_type
                    dct["inp"]["adsorbate"] = adsorbate_name
                    # empty previous output
                    dct["output"] = {}
                    dct["output"]["surface_atoms"] = surface_atoms.tolist()

                    # getting only id for uploading simulations in chunks
                    dct["_id"] = _query_id_counter_and_increment('simulations', db)
                    simulations_chunk_list.append(dct)

                    # update internal workflow data
                    simulation_id = dct["_id"]
                    new_calc_ids.append(simulation_id)

            db["simulations"].insert_many(simulations_chunk_list)
        
        descmatrix = np.array(desc_lst)

        # saves descmatrix as a path to a numpy array
        update_spec["temp"]["descmatrix"] = write_descmatrix(descmatrix)
        update_spec["temp"]["calc_ids"] = new_calc_ids

        update_spec.pop("_category")
        update_spec.pop("name")
        return FWAction(update_spec=update_spec)
Пример #5
0
    def run_task(self, fw_spec):
        calc_analysis_ids_dict = fw_spec["temp"]["calc_analysis_ids_dict"]
        chunk_size = int(self["chunk_size"])
        adsite_types = self["adsite_types"]
        n_calcs_started = int(fw_spec["n_calcs_started"])
        calc_ids = fw_spec["temp"]["calc_ids"]
        # analysis ids becomes part of calc_ids
        analysis_ids = fw_spec["temp"]["analysis_ids"]
        n_calcs = len(calc_ids)
        reaction_energies_list = fw_spec["temp"].get(
            "property",
            np.zeros(n_calcs).tolist())
        is_converged_list = fw_spec["temp"].get("is_converged_list",
                                                np.zeros(n_calcs).tolist())
        is_same_site_list = fw_spec["temp"].get("is_same_site_list",
                                                np.zeros(n_calcs).tolist())

        # reorder analysis_ids
        reordered_analysis_ids = []
        for calc_id in calc_ids:
            if str(calc_id) in calc_analysis_ids_dict:
                analysis_id = calc_analysis_ids_dict[str(calc_id)]
                reordered_analysis_ids.append(analysis_id)

        analysis_ids = reordered_analysis_ids

        print(chunk_size, type(chunk_size))
        if chunk_size == -1:
            calc_ids = analysis_ids
            id_range = range(len(calc_ids))
        else:
            calc_ids[n_calcs_started -
                     chunk_size:n_calcs_started] = analysis_ids
            id_range = range(n_calcs_started - chunk_size, n_calcs_started)
        calc_ids_chunk = analysis_ids
        simulations = fetch_simulations(fw_spec["extdb_connect"],
                                        calc_ids_chunk)
        logging.info("Gather Properties of following calculations:")
        logging.info(calc_ids_chunk)

        ext_db = get_external_database(fw_spec["extdb_connect"])
        # compute reaction energy and store them as lists for ml
        print("id_range", id_range)
        for idx, calc_id in zip(id_range, calc_ids_chunk):
            simulation = simulations[str(calc_id)]

            structure = simulation["atoms"]

            # get closest site classified
            cluster_atoms, adsorbate_atoms, site_ids_list, site_class_list, reference_ids, adsorbate_ids = split_nanocluster_and_adsorbates(
                simulation)

            cluster = cluskit.Cluster(cluster_atoms)
            cluster.get_sites(-1)

            # assumes only one adsorbate
            final_position = adsorbate_atoms.get_positions()[-1]
            closest_sitetype, closest_site_id = cluster.find_closest_site(
                final_position)
            closest_site = cluster.site_surface_atom_ids[closest_sitetype][
                closest_site_id]
            if type(closest_site) in (np.int32, int, np.int64):
                closest_site = np.array([closest_site])

            adsorbates = simulation["adsorbates"]
            initial_site = adsorbates[0].get("site_ids", [])

            if type(initial_site) in (np.int32, int, np.int64):
                initial_site = np.array([initial_site])

            print(closest_site, initial_site)
            print("closest_sitetype", closest_sitetype, type(closest_sitetype))
            print("closest_site", closest_site, type(closest_site))

            if len(set(closest_site) - set(initial_site)) == 0:
                is_same_site = True
            else:
                is_same_site = False
                adsorbates[0]["site_ids"] = closest_site
                adsorbates[0]["site_class"] = closest_sitetype
                if len(initial_site) == 0:
                    logging.warning(
                        "No initial site information found! Could not verify if adsorbate moved to different adsorption site"
                    )
            print("calc_id", calc_id, type(calc_id))
            print("is_same_site", is_same_site, type(is_same_site))

            ext_db["simulations"].update_one({"_id": int(calc_id)}, {
                "$set": {
                    "adsorbates.0.site_class": int(closest_sitetype),
                    "adsorbates.0.site_ids": closest_site.tolist(),
                    "output.is_same_site": is_same_site
                }
            })

            is_converged_list[idx] = simulation["output"]["is_converged"]
            is_same_site_list[idx] = is_same_site

            print(is_converged_list[idx], idx)

            # get current simulation total_energy
            simulation_total_energy = simulation["output"].get(
                "total_energy", 0.0)
            # iterate over
            # adsorbates
            adsorbates = simulation["adsorbates"]
            # nanoclusters
            nanoclusters = simulation["nanoclusters"]
            # substrates
            substrates = simulation["substrates"]

            component_types = [adsorbates, nanoclusters, substrates]
            reaction_energy = simulation_total_energy
            print("energy before adding references", reaction_energy)
            for components in component_types:
                for component in components:
                    reference_id = component["reference_id"]
                    print(reference_id)
                    try:
                        reference_simulation = simulations[str(reference_id)]
                    except:
                        logging.info("getting reference from database")
                        reference_simulation = ext_db["simulations"].find_one(
                            {"_id": reference_id})
                    try:
                        total_energy = reference_simulation["output"][
                            "total_energy"]
                    except:
                        logging.warning(
                            "total_energy not found! Not contributing to reaction energy!"
                        )
                        total_energy = 0.0
                    try:
                        reaction_energy -= float(total_energy)
                    except:
                        logging.warning("Energy not understood!")
                        logging.warning(total_energy)

                    print(reaction_energy, "reference", reference_id)
            reaction_energies_list[idx] = reaction_energy

        fw_spec["temp"]["calc_analysis_ids_dict"] = {}
        fw_spec["temp"]["property"] = reaction_energies_list
        print("reaction_energies_list")
        print(reaction_energies_list)
        print(len(reaction_energies_list))
        fw_spec["temp"]["is_converged_list"] = is_converged_list
        fw_spec["temp"]["is_same_site_list"] = is_same_site_list
        fw_spec["temp"]["analysis_ids"] = []
        fw_spec["temp"]["calc_ids"] = calc_ids
        print("is_converged_list")
        print(is_converged_list)
        print("calc_ids", calc_ids)

        fw_spec.pop("_category")
        fw_spec.pop("name")
        return FWAction(update_spec=fw_spec)