Exemplo n.º 1
0
    def __init__(self,
                 base_dir,
                 db_file="db.json",
                 rxn_coll="reaxys",
                 mol_coll="molecules",
                 thermo_coll="thermo"):
        """
        :param base_dir: Directory where input and output data should be stored.
        :param db_file: Path to database config file.
        :param rxn_coll: Name of database collection where reaction
        metadata is stored.
        :param mol_coll: Name of database collection where molecule
        calculations are stored.
        :param thermo_coll: Name of database collection where reaction
        thermodynamics calculations are stored.
        """

        self.base_dir = base_dir
        self.db_file = db_file

        try:
            self.db = QChemCalcDb.from_db_file(self.db_file)
        except:
            self.db = None

        if self.db is not None:
            self.reactions = self.db.db[rxn_coll]
            self.molecules = self.db.db[mol_coll]
            self.thermo = self.db.db[thermo_coll]
Exemplo n.º 2
0
    def store_epi_suite_db(entries, collection="episuite", db_file="db.json"):
        """
        :param entries: List of dictionaries, with each entry representing the
        EPI Suite output data for a molecule.
        :param collection: Database collection for EPI Suite data. Default is
        "episuite".
        :return: list of mol_ids corresponding to all entries just added to
        collection.
        """

        try:
            db = QChemCalcDb.from_db_file(db_file)
        except:
            raise ConnectionFailure(
                "db_file is invalid, or db listed cannot be"
                " connected to at this time.")

        collection = db.db[collection]

        collection.create_index("mol_id", unique=True)

        just_added = []

        for entry in entries:
            try:
                collection.insert_one(entry)
                just_added.append(entry["mol_id"])
            except DuplicateKeyError:
                continue

        return just_added
Exemplo n.º 3
0
    def __init__(self,
                 base_dir,
                 subdirs=False,
                 reactant_pre="rct_",
                 product_pre="pro_",
                 db_file="db.json"):
        """
        :param base_dir: Directory where input and output data should be stored.
        :param subdirs: Is data all stored in one directory (False), or is it
        separated among subdirectories (True)?
        :param reactant_pre: Prefix for reactant files.
        :param product_pre: Prefix for product files.
        :param db_file: Path to database config file.
        """

        self.base_dir = base_dir
        self.subdirs = subdirs
        self.reactant_pre = reactant_pre
        self.product_pre = product_pre
        self.db_file = db_file

        try:
            self.db = QChemCalcDb.from_db_file(self.db_file)
        except:
            self.db = None
Exemplo n.º 4
0
    def run_task(self, fw_spec):
        # get the directory that contains the QChem dir to parse
        calc_dir = os.getcwd()
        if "calc_dir" in self:
            calc_dir = self["calc_dir"]
        elif self.get("calc_loc"):
            calc_dir = get_calc_loc(self["calc_loc"],
                                    fw_spec["calc_locs"])["path"]
        input_file = self.get("input_file", "mol.qin")
        output_file = self.get("output_file", "mol.qout")
        multirun = self.get("multirun", False)

        # parse the QChem directory
        logger.info("PARSING DIRECTORY: {}".format(calc_dir))

        additional_fields = self.get("additional_fields", [])

        drone = QChemDrone(additional_fields=additional_fields)

        # assimilate (i.e., parse)
        task_doc = drone.assimilate(
            path=calc_dir,
            input_file=input_file,
            output_file=output_file,
            multirun=multirun)

        if "tags" in fw_spec:
            task_doc.update({"tags": fw_spec["tags"]})

        # Check for additional keys to set based on the fw_spec
        if self.get("fw_spec_field"):
            task_doc.update({self.get("fw_spec_field"): fw_spec.get(self.get("fw_spec_field"))})

        # Update fw_spec with final/optimized structure
        update_spec = {}
        if task_doc.get("output").get("optimized_molecule"):
            update_spec["prev_calc_molecule"] = task_doc["output"]["optimized_molecule"]
            update_spec["prev_calc_mulliken"] = task_doc["output"]["mulliken"]
            if "RESP" in task_doc["output"]:
                update_spec["prev_calc_resp"] = task_doc["output"]["RESP"]
            elif "ESP" in task_doc["output"]:
                update_spec["prev_calc_esp"] = task_doc["output"]["ESP"]

        # get the database connection
        db_file = env_chk(self.get("db_file"), fw_spec)

        # db insertion or taskdoc dump
        if not db_file:
            with open(os.path.join(calc_dir, "task.json"), "w") as f:
                f.write(json.dumps(task_doc, default=DATETIME_HANDLER))
        else:
            mmdb = QChemCalcDb.from_db_file(db_file, admin=True)
            t_id = mmdb.insert(task_doc)
            logger.info("Finished parsing with task_id: {}".format(t_id))

        return FWAction(
            stored_data={"task_id": task_doc.get("task_id", None)},
            update_spec=update_spec)
Exemplo n.º 5
0
    def run_task(self, fw_spec):
        # get the directory that contains the QChem dir to parse
        calc_dir = os.getcwd()
        if "calc_dir" in self:
            calc_dir = self["calc_dir"]
        elif self.get("calc_loc"):
            calc_dir = get_calc_loc(self["calc_loc"],
                                    fw_spec["calc_locs"])["path"]
        input_file = self.get("input_file", "mol.qin")
        output_file = self.get("output_file", "mol.qout")
        multirun = self.get("multirun", False)

        # parse the QChem directory
        logger.info("PARSING DIRECTORY: {}".format(calc_dir))

        drone = QChemDrone(additional_fields=self.get("additional_fields"))

        # assimilate (i.e., parse)
        task_doc = drone.assimilate(
            path=calc_dir,
            input_file=input_file,
            output_file=output_file,
            multirun=multirun)

        # Check for additional keys to set based on the fw_spec
        if self.get("fw_spec_field"):
            task_doc.update(fw_spec[self.get("fw_spec_field")])

        # Update fw_spec with final/optimized structure
        update_spec = {}
        if task_doc.get("output").get("optimized_molecule"):
            update_spec["prev_calc_molecule"] = task_doc["output"][
                "optimized_molecule"]

        # get the database connection
        db_file = env_chk(self.get("db_file"), fw_spec)

        # db insertion or taskdoc dump
        if not db_file:
            with open(os.path.join(calc_dir, "task.json"), "w") as f:
                f.write(json.dumps(task_doc, default=DATETIME_HANDLER))
        else:
            mmdb = QChemCalcDb.from_db_file(db_file, admin=True)
            t_id = mmdb.insert(task_doc)
            logger.info("Finished parsing with task_id: {}".format(t_id))

        return FWAction(
            stored_data={"task_id": task_doc.get("task_id", None)},
            update_spec=update_spec)
Exemplo n.º 6
0
    def test_no_opt_Fragmentation(self):
        db_file = os.path.join(db_dir, "db.json")
        mmdb = QChemCalcDb.from_db_file(db_file, admin=True)
        with open(
                os.path.join(module_dir, "..", "..", "test_files",
                             "sb40.json")) as f:
            tmp = json.load(f)
            for entry in tmp:
                mmdb.insert(entry)
        with patch("atomate.qchem.firetasks.fragmenter.FWAction"
                   ) as FWAction_patch:
            mock_FWAction = MagicMock()
            FWAction_patch.return_value = mock_FWAction
            mock_FWAction.as_dict.return_value = {
                "stored_data": {},
                "exit": False,
                "update_spec": {},
                "mod_spec": [],
                "additions": [],
                "detours": [],
                "defuse_children": False,
                "defuse_workflow": False,
            }

            # define starting molecule and workflow object
            initial_mol = Molecule.from_file(
                os.path.join(module_dir, "..", "..", "test_files", "top_11",
                             "EC.xyz"))
            initial_mol.set_charge_and_spin(charge=-1)
            wf = get_fragmentation_wf(
                molecule=initial_mol,
                depth=1,
                pcm_dielectric=40.0,
                do_optimization=False,
                check_db=True,
            )
            self.lp.add_wf(wf)
            rapidfire(
                self.lp,
                fworker=FWorker(env={
                    "max_cores": 24,
                    "db_file": db_file
                }),
                pdb_on_exception=True,
            )

            self.assertEqual(len(FWAction_patch.call_args[1]["additions"]), 0)
        mmdb.reset()
Exemplo n.º 7
0
 def test_in_database_and_EC_neg_frag(self):
     db_file = os.path.join(db_dir, "db.json")
     mmdb = QChemCalcDb.from_db_file(db_file, admin=True)
     with open(os.path.join(module_dir, "..", "..", "test_files","sb40.json")) as f:
         tmp = json.load(f)
         for entry in tmp:
             mmdb.insert(entry)
     with patch("atomate.qchem.firetasks.fragmenter.FWAction"
                ) as FWAction_patch:
         ft = FragmentMolecule(molecule=self.neg_ec, depth=1, qchem_input_params={"pcm_dielectric": 40.0}, check_db=True, db_file=db_file)
         ft.run_task({})
         self.assertEqual(ft.check_db,True)
         frags = ft.unique_fragments
         self.assertEqual(len(frags), 7)
         self.assertEqual(
             len(FWAction_patch.call_args[1]["additions"]), 0)
     mmdb.reset()
Exemplo n.º 8
0
 def test_in_database_and_EC_neg_frag(self):
     db_file = os.path.join(db_dir, "db.json")
     mmdb = QChemCalcDb.from_db_file(db_file, admin=True)
     with open(os.path.join(module_dir, "..", "..", "test_files","sb40.json")) as f:
         tmp = json.load(f)
         for entry in tmp:
             mmdb.insert(entry)
     with patch("atomate.qchem.firetasks.fragmenter.FWAction"
                ) as FWAction_patch:
         ft = FragmentMolecule(molecule=self.neg_ec, depth=1, qchem_input_params={"pcm_dielectric": 40.0}, check_db=True, db_file=db_file)
         ft.run_task({})
         self.assertEqual(ft.check_db,True)
         frags = ft.unique_fragments
         self.assertEqual(len(frags), 7)
         self.assertEqual(
             len(FWAction_patch.call_args[1]["additions"]), 0)
     mmdb.reset()
Exemplo n.º 9
0
    def store_reaxys_reactions_db(reactions,
                                  db_file="db.json",
                                  collection_name="reaxys"):
        """
        Insert reaction information into a MongoDB database.

        :param reactions: List of reactions, defined as above.
        :param db_file: A config file indicating the database into which the
        reactions will be inserted.
        :param collection_name: Collection within the database in which to store
        the reactions.
        :return: List of rxn_ids that were added to collection collection_name
        as a result of this method call
        """

        # Set up MongoDB database with pymatgen-db and pymongo
        try:
            db = QChemCalcDb.from_db_file(db_file)
        except:
            raise RuntimeError("Cannot connect to database. Please check your"
                               " configuration and try again.")

        collection = db.db[collection_name]

        just_added = []

        collection.create_index("rxn_id", unique=True)

        for reaction in reactions:
            # Reorganize for database insertion
            rxn = {}
            rxn["rxn_id"] = str(reaction["meta"]["rxn_id"])
            rxn["pro_ids"] = [str(p[0]) for p in reaction["meta"]["pro_meta"]]
            rxn["pro_names"] = [p[1] for p in reaction["meta"]["pro_meta"]]
            rxn["rct_ids"] = [str(r[0]) for r in reaction["meta"]["rct_meta"]]
            rxn["rct_names"] = [r[1] for r in reaction["meta"]["rct_meta"]]
            rxn["solvents"] = list(reaction["meta"]["solvents"])

            try:
                collection.insert_one(rxn)
                just_added.append(rxn["rxn_id"])
            except DuplicateKeyError:
                continue

        return just_added
Exemplo n.º 10
0
    def query_db_entries(db_file,
                         db_collection="mol_builder",
                         num_entries=None):
        """
        Query a (Sam's) database to pull all the molecules form molecule builder.

        Args:
            db_file (str): a json file storing the info of the database.
            db_collection (str): which database to query. Optionals are `mol_builder`
                and `task`.
            num_entries (int): the number of entries to query, if `None`, get all.

        Returns:
            A list of db entries.
        """

        logger.info("Start querying database...")

        mmdb = QChemCalcDb.from_db_file(db_file, admin=True)

        if db_collection == "mol_builder":
            if num_entries is None:
                entries = mmdb.collection.find()
            else:
                entries = mmdb.collection.find().limit(num_entries)
        elif db_collection == "task":
            query = {"tags.class": "smd_production"}
            if num_entries is None:
                entries = mmdb.collection.find(query)
            else:
                entries = mmdb.collection.find(query).limit(num_entries)
        else:
            raise Exception(
                "Unrecognized db_collection = {}".format(db_collection))

        entries = list(entries)
        logger.info(
            "Finish fetching {} entries of database from query...".format(
                len(entries)))

        return entries
Exemplo n.º 11
0
    def __init__(self,
                 base_dir,
                 reactant_pre="rct_",
                 product_pre="pro_",
                 db_file="db.json"):
        """
        :param base_dir: Directory where input and output data should be stored.
        :param reactant_pre: Prefix for reactant files.
        :param product_pre: Prefix for product files.
        :param db_file: Path to database config file.
        """

        self.base_dir = base_dir
        self.reactant_pre = reactant_pre
        self.product_pre = product_pre
        self.db_file = db_file

        try:
            self.db = QChemCalcDb.from_db_file(self.db_file)
        except:
            self.db = None
Exemplo n.º 12
0
    def test_no_opt_Fragmentation(self):
        db_file = os.path.join(db_dir, "db.json")
        mmdb = QChemCalcDb.from_db_file(db_file, admin=True)
        with open(os.path.join(module_dir, "..", "..", "test_files","sb40.json")) as f:
            tmp = json.load(f)
            for entry in tmp:
                mmdb.insert(entry)
        with patch("atomate.qchem.firetasks.fragmenter.FWAction") as FWAction_patch:
            mock_FWAction = MagicMock()
            FWAction_patch.return_value = mock_FWAction
            mock_FWAction.as_dict.return_value = {'stored_data': {}, 'exit': False, 'update_spec': {}, 'mod_spec': [], 'additions': [], 'detours': [], 'defuse_children': False, 'defuse_workflow': False}

            # define starting molecule and workflow object
            initial_mol = Molecule.from_file(os.path.join(module_dir, "..", "..", "test_files", "top_11", "EC.xyz"))
            initial_mol.set_charge_and_spin(charge=-1)
            wf = get_fragmentation_wf(molecule=initial_mol, depth=1, pcm_dielectric=40.0, do_optimization=False, check_db=True)
            self.lp.add_wf(wf)
            rapidfire(
                self.lp,
                fworker=FWorker(env={"max_cores": 24, "db_file": db_file}), pdb_on_exception=True)

            self.assertEqual(len(FWAction_patch.call_args[1]["additions"]), 0)
        mmdb.reset()
Exemplo n.º 13
0
    def run_task(self, fw_spec):
        # if a molecule is being passed through fw_spec
        if fw_spec.get("prev_calc_molecule"):
            molecule = fw_spec.get("prev_calc_molecule")
        # if a molecule is included as an optional parameter
        elif self.get("molecule"):
            molecule = self.get("molecule")
        # if no molecule is present raise an error
        else:
            raise KeyError(
                "No molecule present, add as an optional param or check fw_spec"
            )

        self.depth = self.get("depth", 1)
        additional_charges = self.get("additional_charges", [])
        self.do_triplets = self.get("do_triplets", False)
        self.linked = self.get("linked", True)
        self.qchem_input_params = self.get("qchem_input_params", {})

        # Specify charges to consider based on charge of the principle molecule:
        if molecule.charge == 0:
            self.charges = [-1, 0, 1]
        elif molecule.charge > 0:
            self.charges = [molecule.charge - 1, molecule.charge]
        else:
            self.charges = [molecule.charge, molecule.charge + 1]
        self.principle_charge = molecule.charge

        # Include any additional charges specified by the user:
        for additional_charge in additional_charges:
            if additional_charge not in self.charges:
                print("Adding additional charge " + str(additional_charge))
                self.charges.append(additional_charge)
            else:
                print("Charge " + str(additional_charge) + " already present!")

        # Obtain fragments from Pymatgen's fragmenter:
        fragmenter = Fragmenter(molecule=molecule,
                                edges=self.get("edges", None),
                                depth=self.depth,
                                open_rings=self.get("open_rings", True),
                                opt_steps=self.get("opt_steps", 10000))
        self.unique_fragments = []
        for key in fragmenter.unique_frag_dict:
            for frag in fragmenter.unique_frag_dict[key]:
                self.unique_fragments.append(frag)

        # Convert fragment molecule graphs into molecule objects with charges given in self.charges
        self._build_unique_relevant_molecules()

        # Then find all unique formulae in our unique molecules to facilitate easier database searching
        self.unique_formulae = []
        for molecule in self.unique_molecules:
            if molecule.composition.reduced_formula not in self.unique_formulae:
                self.unique_formulae.append(
                    molecule.composition.reduced_formula)

        # attempt to connect to the database to later check if a fragment has already been calculated
        find_dict = {"formula_pretty": {"$in": self.unique_formulae}}
        if "pcm_dielectric" in self.qchem_input_params:
            find_dict["calcs_reversed.input.solvent.dielectric"] = str(
                self.qchem_input_params["pcm_dielectric"])
        db_file = env_chk(self.get("db_file"), fw_spec)
        self.check_db = self.get("check_db", bool(db_file))
        self.all_relevant_docs = []
        if db_file and self.check_db:
            mmdb = QChemCalcDb.from_db_file(db_file, admin=True)
            self.all_relevant_docs = list(
                mmdb.collection.find(find_dict, {
                    "formula_pretty": 1,
                    "input.initial_molecule": 1
                }))

        # Return an FWAction which includes a new additional firework for each unique, relevant molecule
        # not already present in our database
        return FWAction(additions=self._build_new_FWs())
Exemplo n.º 14
0
    def run_task(self, fw_spec):
        # get the directory that contains the QChem dir to parse
        calc_dir = os.getcwd()
        if "calc_dir" in self:
            calc_dir = self["calc_dir"]
        elif self.get("calc_loc"):
            calc_dir = get_calc_loc(self["calc_loc"],
                                    fw_spec["calc_locs"])["path"]
        input_file = self.get("input_file", "mol.qin")
        output_file = self.get("output_file", "mol.qout")
        multirun = self.get("multirun", False)

        # parse the QChem directory
        logger.info("PARSING DIRECTORY: {}".format(calc_dir))

        drone = QChemDrone(additional_fields=self.get("additional_fields"))

        # assimilate (i.e., parse)
        task_doc = drone.assimilate(path=calc_dir,
                                    input_file=input_file,
                                    output_file=output_file,
                                    multirun=multirun)

        # Check for additional keys to set based on the fw_spec
        if self.get("fw_spec_field"):
            task_doc.update(fw_spec[self.get("fw_spec_field")])

        # Update fw_spec with final/optimized structure
        update_spec = {}
        if task_doc.get("output").get("optimized_molecule"):
            update_spec["prev_calc_molecule"] = task_doc["output"][
                "optimized_molecule"]

        # get the database connection
        db_file = env_chk(self.get("db_file"), fw_spec)

        # db insertion or taskdoc dump
        if not db_file:
            with open(os.path.join(calc_dir, "task.json"), "w") as f:
                f.write(json.dumps(task_doc, default=DATETIME_HANDLER))
        else:
            mmdb = QChemCalcDb.from_db_file(db_file, admin=True)
            t_id = mmdb.insert(task_doc)
            logger.info("Finished parsing with task_id: {}".format(t_id))

        defuse_children = False
        if task_doc["state"] != "successful":
            defuse_unsuccessful = self.get("defuse_unsuccessful",
                                           DEFUSE_UNSUCCESSFUL)
            if defuse_unsuccessful is True:
                defuse_children = True
            elif defuse_unsuccessful is False:
                pass
            elif defuse_unsuccessful == "fizzle":
                raise RuntimeError(
                    "QChemToDb indicates that job is not successful "
                    "(perhaps your job did not converge within the "
                    "limit of electronic iterations)!")
            else:
                raise RuntimeError("Unknown option for defuse_unsuccessful: "
                                   "{}".format(defuse_unsuccessful))

        return FWAction(stored_data={"task_id": task_doc.get("task_id", None)},
                        defuse_children=defuse_children,
                        update_spec=update_spec)