def restart_torsiondrives(self, torsiondrive_ids: List[int], client: FractalClient) -> None: """ Restart all torsiondrive records. """ for td in torsiondrive_ids: client.modify_services("restart", procedure_id=td)
def restart_optimizations(self, optimization_ids: List[int], client: FractalClient) -> None: """ Restart all optimizations. """ for opt in optimization_ids: client.modify_tasks(operation="restart", base_result=opt)
def __init__(self, coll_name: str, qc_spec: str, base_class: Type[Collection] = Dataset, address: str = "localhost:7874", qca_passwd: Optional[str] = None, create: bool = False): """Open the geometry computation dataset Args: address: Address for the QCFractal server base_class: Type of the collection qc_spec: Name of the QC specification coll_name: Name of the collection holding the data qca_passwd: Password for the QCFractal server create: Whether creating a new collection is acceptable """ if qca_passwd is None: qca_passwd = os.environ.get("QCAUSR", None) self.qc_spec = qc_spec self.client = FractalClient(address, username='******', password=qca_passwd, verify=False) try: self.coll = base_class.from_server(name=coll_name, client=self.client) except KeyError as ex: if create: self.coll = base_class(name=coll_name, client=self.client) self.coll.save() else: raise ex
def live_fractal_or_skip(): """ Ensure Fractal live connection can be made First looks for a local staging server, then tries QCArchive. """ try: return FractalClient("localhost:7777", verify=False) except (requests.exceptions.ConnectionError, ConnectionRefusedError): print("Failed to connect to localhost, trying MolSSI QCArchive.") try: requests.get("https://api.qcarchive.molssi.org:443", json={}, timeout=5) return FractalClient() except (requests.exceptions.ConnectionError, ConnectionRefusedError): return pytest.skip("Could not make a connection to central Fractal server")
def test_collecting_results(): """ Make sure that tasks are collected correctly from a QCArchive instance. """ # connect to the public database client = FractalClient() biphenyl = Molecule.from_file(file_path=get_data("biphenyl.sdf"), file_format="sdf") # now make the schema schema = get_fitting_schema(molecules=biphenyl) # now submit to the executor executor = Executor() # change to make sure we search the correct dataset executor._dataset_name = "OpenFF-benchmark-ligand-fragments-v1.0" # fake a collection dict to_collect = { "torsion1d": { "default": [ "[h]c1c([c:1]([c:2](c(c1[h])[h])[c:3]2[c:4](c(c(c(c2[h])cl)[h])[h])[h])[h])[h]", ] }, "optimization": {}, "hessian": {} } # now let the executor update the task executor.collect_task_results(task=schema.tasks[0], collection_dict=to_collect, client=client) # make sure it worked assert schema.tasks[0].ready_for_fitting is True
def test_make_fitting_schema_from_results(): """ Test that new fitting schemas can be made from results and that all results are full """ # build the workflow workflow = WorkflowFactory() fb = ForceBalanceOptimizer() fb.set_optimization_target(target=AbInitio_SMIRNOFF()) workflow.set_optimizer(optimizer=fb) # set up the client and load the results # load a client and pull some results client = FractalClient() # grab a dataset with small fragments in it result = TorsionDriveCollectionResult.from_server( client=client, spec_name="default", dataset_name="OpenFF-benchmark-ligand-fragments-v1.0", final_molecule_only=True, subset=bace_entries) schema = workflow.fitting_schema_from_results(results=result, combine=True) # there should be 2 total molecules as we have combined two results assert schema.n_molecules == 2 # there are a total of 3 torsiondrives assert schema.n_tasks == 3 # make sure each task has results and is ready to fit for task in schema.tasks: assert task.ready_for_fitting is True
def test_task_from_results(): """ Test making an individual task from a set of results """ # load a client and pull some results client = FractalClient() # grab a dataset with small fragments in it result = TorsionDriveCollectionResult.from_server( client=client, spec_name="default", dataset_name="OpenFF-benchmark-ligand-fragments-v1.0", final_molecule_only=True, subset=bace_entries[:1]) # grab the only result result = list(result.collection.values())[0] # set up the workflow workflow = WorkflowFactory() fb = ForceBalanceOptimizer() fb.set_optimization_target(target=AbInitio_SMIRNOFF()) workflow.set_optimizer(optimizer=fb) # this should be a simple biphenyl molecule opt_schema = workflow._task_from_results(results=[ result, ], index=1) assert opt_schema.initial_forcefield == workflow.initial_forcefield assert opt_schema.optimizer_name == fb.optimizer_name assert opt_schema.job_id == "bespoke_task_1" assert bool(opt_schema.target_smirks) is True assert opt_schema.target_parameters == workflow.target_parameters assert result.molecule == opt_schema.target_molecule.molecule assert opt_schema.n_tasks == 1 assert opt_schema.n_targets == 1 assert opt_schema.ready_for_fitting is True
def managed_compute_server(request, postgres_server): """ A FractalServer with compute associated parametrize for all managers. """ storage_name = "test_qcfractal_compute_server" postgres_server.create_database(storage_name) adapter_client = build_adapter_clients(request.param, storage_name=storage_name) # Build a server with the thread in a outer context loop # Not all adapters play well with internal loops with loop_in_thread() as loop: server = FractalServer(port=find_open_port(), storage_project_name=storage_name, storage_uri=postgres_server.database_uri(), loop=loop, queue_socket=adapter_client, ssl_options=False) # Clean and re-init the database reset_server_database(server) # Build Client and Manager from qcfractal.interface import FractalClient client = FractalClient(server) from qcfractal.queue import QueueManager manager = QueueManager(client, adapter_client) yield client, server, manager # Close down and clean the adapter manager.close_adapter() manager.stop()
def build_managed_compute_server(mtype): storage_name = "qcf_compute_server_test" adapter_client = build_adapter_clients(mtype, storage_name=storage_name) # Build a server with the thread in a outer context loop # Not all adapters play well with internal loops with loop_in_thread() as loop: server = FractalServer(port=find_open_port(), storage_project_name=storage_name, loop=loop, queue_socket=adapter_client, ssl_options=False) # Clean and re-init the database reset_server_database(server) # Build Client and Manager from qcfractal.interface import FractalClient client = FractalClient(server) from qcfractal.queue import QueueManager manager = QueueManager(client, adapter_client) yield client, server, manager # Close down and clean the adapter manager.close_adapter() manager.stop()
def live_fractal_or_skip(): """Ensure Fractal live connection can be made""" try: import qcfractal.interface requests.get('https://api.qcarchive.molssi.org:443', json={}, timeout=5) return FractalClient() except (requests.exceptions.ConnectionError, ConnectionRefusedError): return pytest.skip("Could not make a connection to central Fractal server")
class QCFractalWrapper: """Wrapper over a QFractal Dataset class Handles creating and authenticating with the underlying class method. It is a base class for building superclasses that create utility operations for adding molecules to the database (e.g., generate XYZ coordinates to allow for a 'just add this SMILES'), using a consistent identifier format, post-processing routines for accessing data in a database, and passing data between different steps of the process.""" def __init__(self, coll_name: str, qc_spec: str, base_class: Type[Collection] = Dataset, address: str = "localhost:7874", qca_passwd: Optional[str] = None, create: bool = False): """Open the geometry computation dataset Args: address: Address for the QCFractal server base_class: Type of the collection qc_spec: Name of the QC specification coll_name: Name of the collection holding the data qca_passwd: Password for the QCFractal server create: Whether creating a new collection is acceptable """ if qca_passwd is None: qca_passwd = os.environ.get("QCAUSR", None) self.qc_spec = qc_spec self.client = FractalClient(address, username='******', password=qca_passwd, verify=False) try: self.coll = base_class.from_server(name=coll_name, client=self.client) except KeyError as ex: if create: self.coll = base_class(name=coll_name, client=self.client) self.coll.save() else: raise ex def get_molecules(self, mol_ids: List[int]) -> List[Molecule]: """Lookup the molecules from the Args: mol_ids: List of molecule IDs Returns: Requested molecules """ mols: List[Molecule] = [] for i in range(0, len(mol_ids), 1000): # Query by 1000s mols.extend(self.client.query_molecules(mol_ids[i:i + 1000])) mol_lookup = dict((m.id, m) for m in mols) mols = [mol_lookup[i] for i in mol_ids] return mols
def test_submit_new_tasks(fractal_compute_server): """ Make sure that any new tasks which are generated/found are added to the archive instance. """ client = FractalClient(fractal_compute_server) # this will not actually run as we do not install psi4 biphenyl = Molecule.from_file(file_path=get_data("biphenyl.sdf"), file_format="sdf") # now make the schema schema = get_fitting_schema(molecules=biphenyl) executor = Executor() # make sure new tasks are submitted task = schema.tasks[0] response = executor.submit_new_tasks(task=task, client=client) assert response == {'OpenFF Bespoke-fit': {'default': 1}}
def restart_archive_record( self, task: Union[ResultRecord, OptimizationRecord, TorsionDriveRecord], client: FractalClient, ) -> None: """ Take a record and dispatch the type of restart to be done. """ if task.__class__ == ResultRecord: print("restarting basic ...") self.restart_basic( [ task.id, ], client=client, ) elif task.__class__ == OptimizationRecord: print("restarting optimizations ...") self.restart_optimizations( [ task.id, ], client=client, ) else: print("restarting torsiondrives and optimizations ...") # we need the optimization ids first td_opts = [] for optimizations in task.optimization_history.values(): td_opts.extend(optimizations) # now query the optimizations opt_records = client.query_procedures(td_opts) restart_opts = [ opt.id for opt in opt_records if opt.status == "ERROR" ] # restart opts then torsiondrives self.restart_optimizations(restart_opts, client=client) self.restart_torsiondrives( [ task.id, ], client=client, )
def test_sort_results(combine): """ Test sorting the results before making a fitting schema with and without combination. """ # load up the fractal client client = FractalClient() # grab a dataset with bace fragments in it result = TorsionDriveCollectionResult.from_server( client=client, spec_name="default", dataset_name="OpenFF-benchmark-ligand-fragments-v1.0", final_molecule_only=True, subset=bace_entries) workflow = WorkflowFactory() # now sort the results all_results = workflow._sort_results(results=result, combine=combine) if combine: assert len(all_results) == 2 else: assert len(all_results) == 3
def test_error_cycle_complete(): """ Try and error cycle a task which is complete in qcarchive this should cause the task result to be collected and put into the optimization queue. """ client = FractalClient() biphenyl = Molecule.from_file(get_data("biphenyl.sdf")) schema = get_fitting_schema(biphenyl) execute = Executor() # fake the dataset name execute._dataset_name = "OpenFF-benchmark-ligand-fragments-v1.0" task = schema.tasks[0] tasks = list(task.get_task_map().keys()) # fake the task map execute.task_map = { tasks[0]: "[h]c1c([c:1]([c:2](c(c1[h])[h])[c:3]2[c:4](c(c(c(c2[h])cl)[h])[h])[h])[h])[h]" } execute._error_cycle_task(task=task, client=client) # the result should be collected and the task is now in the opt queue opt_task = execute.opt_queue.get(timeout=5) assert opt_task.ready_for_fitting is True
def get_optimization_specification(client: FractalClient, name: str) -> dict: """Get a specification from a hard-coded list of specifications. Will add keywords for the specification with the QCFractal database so that the specification is ready to use in an OptimizationDataset Args: client: QCFractal client name: Name of the specification Returns: Dictionary ready to pass to QCFractal """ # Lookup the desired specification spec = deepcopy(_opt_specs[name]) # Add the keyword arguments for the qc_spec kwds = spec['qc_spec'].get('keywords', None) if kwds is not None: kwd_id = client.add_keywords([kwds])[0] spec['qc_spec']['keywords'] = kwd_id return spec
def _error_cycle_task(self, task: OptimizationSchema, client: FractalClient) -> None: """ Specific error cycling for a given task. """ print("task molecule name ", task.job_id) # keep track of any records that should be collected to_collect = {"torsion1d": {}, "optimization": {}, "hessian": {}} # loop through each target and loop for tasks to update for target in task.targets: # get the dataset dataset = client.get_collection( collection_type=self._dataset_type_mapping[ target.collection_workflow], name=self._dataset_name, ) # now update each entry for entry in target.tasks: # now for each one we want to query the archive and their status task_hash = entry.get_task_hash() entry_id = self.task_map[task_hash] print("pulling record for ", entry_id) record = self.get_record(dataset=dataset, spec=target.qc_spec, record_name=entry_id) if record.status.value == "COMPLETE": collection_set = to_collect[target.collection_workflow] collection_set.setdefault(target.qc_spec.spec_name, []).append(entry_id) elif record.status.value == "ERROR": # save the error into the task task.error_message = record.get_error() print( f"The task {task.job_id} has errored with attempting restart. Error message:", task.error_message, ) # update the restart count if task_hash not in self.retries: self.retries[task_hash] = 1 else: self.retries[task_hash] += 1 if self.retries[task_hash] == self.max_retires: # mark as errored task.status = Status.CollectionError else: # restart the job self.restart_archive_record(task=record, client=client) task.status = Status.ErrorCycle else: # the task is incomplete let it run continue # if we have values to collect update the task here if any(to_collect.values()): print("collecting results for ", to_collect) self.collect_task_results(task, to_collect, client) # now we should look for new tasks to submit print("looking for new reference tasks ...") if task.get_task_map(): response = self.submit_new_tasks(task, client=client) print("response of new tasks ... ", response) print("checking for optimizations to run ...") if task.ready_for_fitting: # the molecule is done pas to the opt queue to be removed self.opt_queue.put(task) elif task.status == Status.CollectionError: # one of the collection entries has filed so pass to opt which will fail self.opt_queue.put(task) else: print("task not finished putting back into the queue.") # the molecule is not finished and not ready for opt error cycle again self.collection_queue.put(task)