Пример #1
0
    def process_outputs(self, result: Result):
        # Get basic task information
        smiles, = result.args

        # Release nodes for use by other processes
        self.rec.release("simulation", 1)

        # If successful, add to the database
        if result.success:
            # Mark that we've had another complete result
            self.n_evaluated += 1
            self.logger.info(f'Success! Finished screening {self.n_evaluated}/{self.n_to_evaluate} molecules')

            # Determine whether to start re-training
            if self.n_evaluated % self.n_complete_before_retrain == 0:
                if self.update_in_progress.is_set():
                    self.logger.info(f'Waiting until previous training run completes.')
                else:
                    self.logger.info(f'Starting retraining.')
                    self.start_training.set()
            self.logger.info(f'{self.n_complete_before_retrain - self.n_evaluated % self.n_complete_before_retrain} results needed until we re-train again')

            # Store the data in a molecule data object
            data = MoleculeData.from_identifier(smiles=smiles)
            opt_records, hess_records = result.value
            for r in opt_records:
                data.add_geometry(r)
            for r in hess_records:
                data.add_single_point(r)
            data.update_thermochem()
            apply_recipes(data)

            # Attach the data source for the molecule
            data.subsets.append(self.search_space_name)
            
            # Add the IPs to the result object
            result.task_info["ip"] = data.oxidation_potential.copy()

            # Add to database
            with open(self.output_dir.joinpath('moldata-records.json'), 'a') as fp:
                print(json.dumps([datetime.now().timestamp(), data.json()]), file=fp)
            self.database.update_molecule(data)

            # Write to disk
            with open(self.output_dir.joinpath('qcfractal-records.json'), 'a') as fp:
                for r in opt_records + hess_records:
                    print(r.json(), file=fp)
            self.logger.info(f'Added complete calculation for {smiles} to database.')
            
            # Mark that we've completed one
            if self.n_evaluated >= self.n_to_evaluate:
                self.logger.info(f'No more molecules left to screen')
                self.done.set()
        else:
            self.logger.info(f'Computations failed for {smiles}. Check JSON file for stacktrace')

        # Write out the result to disk
        with open(self.output_dir.joinpath('simulation-results.json'), 'a') as fp:
            print(result.json(exclude={'value'}), file=fp)
        self.logger.info(f'Processed simulation task.')
Пример #2
0
    def get_next_step(self, record: MoleculeData) -> Optional[str]:
        """Get the next fidelity level for a certain molecule given what we know about it

        Args:
            record: Molecule to be evaluated
        Returns:
            The name of the next level of fidelity needed for this computation. ``None`` if all have been completed
        """

        # Make sure all of our evaluations are up-to-date
        apply_recipes(record)

        # Get the appropriate property we are looking through
        data = record.reduction_potential if self.oxidation_state == OxidationState.REDUCED \
            else record.oxidation_potential

        # If the highest level is found, return None as we're done
        if self.levels[-1] in data:
            return None

        # Get the first level to be found in the molecule
        current_level = None
        for level in self.levels[::-1]:
            if level in data:
                current_level = level
                break

        # If no level has been completed, return the first level
        if current_level is None:
            return self.levels[0]

        # Otherwise, return the next level in the chain
        return self.levels[self.levels.index(current_level) + 1]
Пример #3
0
    def get_current_step(self, record: MoleculeData) -> str:
        """Get the current level of fidelity for a certain molecule

        Args:
            record: Molecule to be evaluated
        Returns:
            The name of the highest-level achieved so far. "base" if the molecule has yet to be assessed
        """

        # Make sure all of our evaluations are up-to-date
        apply_recipes(record)

        # Get the appropriate property we are looking through
        data = record.reduction_potential if self.oxidation_state == OxidationState.REDUCED \
            else record.oxidation_potential

        # Get the current level
        for level in self.levels[::-1]:
            if level in data:
                return level
        return 'base'
Пример #4
0
    def record_qc(self, result: Result):
        # Get basic task information
        smiles, = result.args
        
        # Release nodes for use by other processes
        self.rec.release("simulation", self.nodes_per_qc)

        # If successful, add to the database
        if result.success:
            # Store the data in a molecule data object
            data = MoleculeData.from_identifier(smiles=smiles)
            opt_records, hess_records = result.value
            for r in opt_records:
                data.add_geometry(r)
            for r in hess_records:
                data.add_single_point(r)
            apply_recipes(data)  # Compute the IP

            # Add to database
            with open(self.output_dir.joinpath('moldata-records.json'), 'a') as fp:
                print(json.dumps([datetime.now().timestamp(), data.json()]), file=fp)
            self.database.append(data)
            
            # If the database is complete, set "done"
            if len(self.database) >= self.target_size:
                self.logger.info(f'Database has reached target size of {len(self.database)}. Exiting')
                self.done.set()

            # Write to disk
            with open(self.output_dir.joinpath('qcfractal-records.json'), 'a') as fp:
                for r in opt_records + hess_records:
                    print(r.json(), file=fp)
            self.logger.info(f'Added complete calculation for {smiles} to database.')
        else:
            self.logger.info(f'Computations failed for {smiles}. Check JSON file for stacktrace')

        # Write out the result to disk
        with open(self.output_dir.joinpath('simulation-results.json'), 'a') as fp:
            print(result.json(exclude={'value'}), file=fp)
Пример #5
0
    def update_molecule(self, molecule: MoleculeData) -> UpdateResult:
        """Update the data for a single molecule

        Args:
            molecule: Data for a certain molecule to be updated.
                All fields specified in this record will be updated or added to the matching document.
                No fields will be deleted by this operation.
        Returns:
            An update result
        """

        # Double-check the format
        MoleculeData.validate(molecule)

        # Update the derived properties
        molecule.update_thermochem()
        apply_recipes(molecule)

        # Generate the update and send it to the database
        update_record = generate_update(molecule)
        return self.collection.update_one({'key': molecule.key},
                                          update_record,
                                          upsert=True)
Пример #6
0
    def record_qc(self, result: Result):
        # Get basic task information
        inchi = result.task_info['inchi']
        self.logger.info(f'{result.method} computation for {inchi} finished')

        # Release nodes for use by other processes
        self.rec.release("simulation", self.nodes_per_qc)

        # If successful, add to the database
        if result.success:
            self.n_evaluated += 1

            # Check if we are done
            if self.n_evaluated >= self.n_to_evaluate:
                self.logger.info(f'We have evaluated as many molecules as requested. exiting')
                self.done.set()

            # Store the data in a molecule data object
            data = self.database.get_molecule_record(inchi=inchi)  # Get existing information
            opt_records, spe_records = result.value
            for r in opt_records:
                data.add_geometry(r, overwrite=True)
            for r in spe_records:
                data.add_single_point(r)
            apply_recipes(data)  # Compute the IP

            # Add ionization potentials to the task_info
            result.task_info['ips'] = data.oxidation_potential

            # Add to database
            with open(self.output_dir.joinpath('moldata-records.json'), 'a') as fp:
                print(json.dumps([datetime.now().timestamp(), data.json()]), file=fp)
            self.database.update_molecule(data)

            # If the database is complete, set "done"
            if self.output_property.split(".")[-1] in data.oxidation_potential:
                self.until_retrain -= 1
                self.logger.info(f'High fidelity complete. {self.until_retrain} before retraining')
            else:
                self.to_reevaluate.append(data)
                self.until_reevaluate -= 1
                self.logger.info(f'Low fidelity complete. {self.until_reevaluate} before re-ordering')

            # Check if we should re-do training
            if self.until_retrain <= 0 and not self.done.is_set():
                # If we have enough new
                self.logger.info('Triggering training to start')
                self.start_training.set()
            elif self.until_reevaluate <= 0 and not (self.start_training.is_set() or self.done.is_set()):
                # Restart inference if we have had enough complete computations
                self.logger.info('Triggering inference to begin again')
                self.start_inference.set()

            # Write to disk
            with open(self.output_dir.joinpath('qcfractal-records.json'), 'a') as fp:
                for r in opt_records + spe_records:
                    print(r.json(), file=fp)
            self.logger.info(f'Added complete calculation for {inchi} to database.')
        else:
            self.logger.info(f'Computations failed for {inchi}. Check JSON file for stacktrace')

        # Write out the result to disk
        with open(self.output_dir.joinpath('simulation-results.json'), 'a') as fp:
            print(result.json(exclude={'value'}), file=fp)
Пример #7
0
    def process_outputs(self, result: Result):
        # Release nodes for use by other processes
        self.rec.release("simulation", 1)

        # Unpack the task information
        inchi = result.task_info['inchi']
        method = result.method
        level = result.task_info['level']

        # If successful, add to the database
        self.logger.info(f'Completed {method} at {level} for {inchi}')
        if result.success:
            # Store the data in a molecule data object
            data = self.database.get_molecule_record(inchi=inchi)
            if method == 'relax_structure':
                data.add_geometry(result.value)
            else:
                data.add_single_point(result.value)
            data.update_thermochem()
            apply_recipes(data)

            # If there are still more computations left to complete a level, re-add it to the priority queue
            # This happens only if a new geometry was created
            cur_recipe = get_recipe_by_name(result.task_info['level'])
            try:
                to_run = cur_recipe.get_required_calculations(
                    data, self.search_spec.oxidation_state)
            except KeyError:
                to_run = []
            if len(to_run) > 0 and result.method == 'relax_structure':
                self.logger.info(
                    'Not yet done with the recipe. Re-adding to task queue')
                self.task_queue.put(
                    _PriorityEntry(
                        inchi=inchi,
                        item=result.task_info,
                        score=-np.inf  # Put it at the front of the queue
                    ))
            elif len(to_run) == 0:
                # Mark that we've had another complete result
                self.n_evaluated += 1
                self.logger.info(
                    f'Success! Finished screening {self.n_evaluated}/{self.n_to_evaluate} molecules'
                )

                # Determine whether to start re-training
                if self.n_evaluated % self.n_complete_before_retrain == 0:
                    if self.update_in_progress.is_set():
                        self.logger.info(
                            f'Waiting until previous training run completes.')
                    else:
                        self.logger.info(f'Starting retraining.')
                        self.start_training.set()
                self.logger.info(
                    f'{self.n_complete_before_retrain - self.n_evaluated % self.n_complete_before_retrain}'
                    ' results needed until we re-train again')

            # Attach the data source for the molecule
            data.subsets.append(self.search_space_name)

            # Add the IPs to the result object
            result.task_info["ip"] = data.oxidation_potential.copy()
            result.task_info["ea"] = data.reduction_potential.copy()

            # Add to database
            with open(self.output_dir.joinpath('moldata-records.json'),
                      'a') as fp:
                print(json.dumps([datetime.now().timestamp(),
                                  data.json()]),
                      file=fp)
            self.database.update_molecule(data)

            # Write to disk
            with gzip.open('qcfractal-records.json.gz', 'at') as fp:
                print(result.value.json(), file=fp)
            self.logger.info(
                f'Added complete calculation for {inchi} to database.')
        else:
            self.failed_molecules.add(inchi)
            self.logger.info(
                f'Computations failed for {inchi}. Check JSON file for stacktrace'
            )

        # Write out the result to disk
        result.task_info['inputs'] = str(result.inputs)
        with open(self.output_dir.joinpath('simulation-results.json'),
                  'a') as fp:
            print(result.json(exclude={'inputs', 'value'}), file=fp)
        self.logger.info(f'Processed simulation task.')
Пример #8
0
    def record_qc(self, result: Result):
        # Get basic task information
        inchi = result.task_info['inchi']
        self.logger.info(f'{result.method} computation for {inchi} finished')
        
        # Check if it failed due to a ManagerLost exception
        if result.failure_info is not None and \
            'Task failure due to loss of manager' in result.failure_info.exception:
            # If so, resubmit it
            self.logger.info('Task failed due to manager loss. Resubmitting, as this task could still succeed')
            self.queues.send_inputs(*result.args, input_kwargs=result.kwargs, task_info=result.task_info,
                                    method=result.method, keep_inputs=True, topic='simulate')
            return

        # Release nodes for use by other processes
        self.rec.release("simulation", self.nodes_per_qc)

        # If successful, add to the database
        if result.success:
            self.n_evaluated += 1

            # Check if we are done
            if self.n_evaluated >= self.n_to_evaluate:
                self.logger.info(f'We have evaluated as many molecules as requested. exiting')
                self.done.set()

            # Write outputs to disk
            opt_records, spe_records = result.value
            with open(self.output_dir.joinpath('..', '..', 'qcfractal-records.json'), 'a') as fp:
                for r in opt_records + spe_records:
                    r.extras['inchi'] = inchi
                    print(r.json(), file=fp)

            # Store the data in a molecule data object
            data = self.database.get_molecule_record(inchi=inchi)  # Get existing information
            store_success = False
            try:
                for r in opt_records:
                    data.add_geometry(r, overwrite=True)
                for r in spe_records:
                    data.add_single_point(r)
                store_success = True
            except UnmatchedGeometry:
                self.logger.warning(f'Failed to match {inchi} geometry to an existing record.'
                                    ' Tell Logan his hashes are broken again!')
            apply_recipes(data)  # Compute the IP

            # Add ionization potentials to the task_info
            result.task_info['ips'] = data.oxidation_potential
            result.task_info['eas'] = data.reduction_potential

            # Add to database
            with open(self.output_dir.joinpath('moldata-records.json'), 'a') as fp:
                print(json.dumps([datetime.now().timestamp(), data.json()]), file=fp)
            self.database.update_molecule(data)

            # Mark if we have completed a new record of the output property
            outputs = data.oxidation_potential if self.oxidize else data.reduction_potential
            if self.target_recipe.name in outputs:  # All SPE are complete
                self.until_retrain -= 1
                self.logger.info(f'High fidelity complete. {self.until_retrain} before retraining')
            elif result.task_info['method'] != "compute_single_point" and store_success:
                self.until_reevaluate -= 1
                self.logger.info(f'Low fidelity complete. {self.until_reevaluate} before re-ordering')
                if result.method == 'compute_vertical':
                    self.to_reevaluate['adiabatic'].append(data)
                else:
                    self.to_reevaluate['normal'].append(data)

            # Check if we should re-do training or re-run inference
            if self.until_retrain <= 0 and not self.done.is_set():
                # If we have enough new
                self.logger.info('Triggering training to start')
                self.start_training.set()
            elif self.until_reevaluate <= 0 and not (self.start_training.is_set() or self.done.is_set()):
                # Restart inference if we have had enough complete computations
                self.logger.info('Triggering inference to begin again')
                self.start_inference.set()

            self.logger.info(f'Added complete calculation for {inchi} to database.')
        else:
            self.logger.info(f'Computations failed for {inchi}. Check JSON file for stacktrace')

        # Write out the result to disk
        with open(self.output_dir.joinpath('simulation-results.json'), 'a') as fp:
            print(result.json(exclude={'value'}), file=fp)