def _update_state( self, td_state: Dict[str, Any], result_data: List[GridPointResult] ) -> Dict[str, Any]: """ Update the td_state with the result queue and save to file. Note: Torsiondrive api always wants the coordinates to be an np.array Args: td_state: The current torsiondrive state object. result_data: The list of result data from the current round of optimisations. Returns: An updated torsiondrive state object. """ job_results = {} for result in result_data: job_results.setdefault(str(result.dihedral_angle), []).append( ( np.array(result.input_geometry), np.array(result.final_geometry), result.final_energy, ) ) # now update the state with results td_api.update_state(td_state=td_state, job_results=job_results) # save to file td_api.current_state_json_dump(td_state, "torsiondrive_state.json") return td_state
def update_state(self, output_queue): """Using the output queue, update the tdrive state""" job_results = {} while not output_queue.empty(): results = output_queue.get_nowait() job_results.setdefault(str(results[0]), []).append(*results[1:]) td_api.update_state(self.td_state, job_results)
def run_torsiondrive_scan(self): """ Run torsiondrive scan in the following steps: 1. Create json input for torsiondrive 2. Send the json input dictionary to td_api.next_jobs_from_state(), get the next set of jobs 3. If there are no jobs needed, finish and return the lowest energy on each dihedral grid 4. If there are new jobs, run them with geomeTRIC.run_json. 5. Collect the results and put them into new json input dictionary 6. Go back to step 2. """ # step 1 td_state = td_api.create_initial_state( dihedrals=self.dihedrals, grid_spacing=self.grid_spacing, elements=self.elements, init_coords=self.init_coords, dihedral_ranges=self.dihedral_ranges, energy_decrease_thresh=self.energy_decrease_thresh, energy_upper_limit=self.energy_upper_limit, ) while True: # step 2 next_jobs = td_api.next_jobs_from_state(td_state, verbose=True) # step 3 if len(next_jobs) == 0: print("torsiondrive Scan Finished") return td_api.collect_lowest_energies(td_state) # step 4 job_results = collections.defaultdict(list) for grid_id_str, job_geo_list in next_jobs.items(): for job_geo in job_geo_list: dihedral_values = td_api.grid_id_from_string(grid_id_str) # Run geometric geometric_input_dict = self.make_geomeTRIC_input( dihedral_values, job_geo) geometric_output_dict = geometric.run_json.geometric_run_json( geometric_input_dict) # Pull out relevevant data final_result = geometric_output_dict['trajectory'][-1] final_geo = final_result['molecule']['geometry'] final_energy = final_result['properties']['return_energy'] # Note: the results should be appended in the same order as in the inputs # It's not a problem here when running serial for loop job_results[grid_id_str].append( (job_geo, final_geo, final_energy)) # step 5 td_api.update_state(td_state, job_results)
def iterate(self): _check_td() from torsiondrive import td_api self.status = "RUNNING" # Check if tasks are done if self.task_manager.done() is False: return False complete_tasks = self.task_manager.get_tasks() # Populate task results task_results = {} for key, task_ids in self.task_map.items(): task_results[key] = [] for task_id in task_ids: # Cycle through all tasks for this entry ret = complete_tasks[task_id] # Lookup molecules initial_id = ret["initial_molecule"] final_id = ret["final_molecule"] mol_ids = [initial_id, final_id] mol_data = self.storage_socket.get_molecules( id=mol_ids)["data"] mol_map = {x.id: x.geometry for x in mol_data} task_results[key].append( (mol_map[initial_id], mol_map[final_id], ret["energies"][-1])) # The torsiondrive package uses print, so capture that using # contextlib td_stdout = io.StringIO() with contextlib.redirect_stdout(td_stdout): td_api.update_state(self.torsiondrive_state, task_results) # Create new tasks from the current state next_tasks = td_api.next_jobs_from_state(self.torsiondrive_state, verbose=True) self.stdout += "\n" + td_stdout.getvalue() # All done if len(next_tasks) == 0: self.status = "COMPLETE" self.update_output() return True self.submit_optimization_tasks(next_tasks) return False
def iterate(self): _check_td() from torsiondrive import td_api self.status = "RUNNING" # Check if tasks are done if self.task_manager.done() is False: return False complete_tasks = self.task_manager.get_tasks() # Populate task results task_results = {} for key, task_ids in self.task_map.items(): task_results[key] = [] # Check for history key if key not in self.optimization_history: self.optimization_history[key] = [] for task_id in task_ids: # Cycle through all tasks for this entry ret = complete_tasks[task_id] # Lookup molecules mol_keys = self.storage_socket.get_molecules( id=[ret["initial_molecule"], ret["final_molecule"] ])["data"] task_results[key].append( (mol_keys[0].geometry, mol_keys[1].geometry, ret["energies"][-1])) # Update history self.optimization_history[key].append(ret["id"]) td_api.update_state(self.torsiondrive_state, task_results) # Create new tasks from the current state next_tasks = td_api.next_jobs_from_state(self.torsiondrive_state, verbose=True) # All done if len(next_tasks) == 0: return self.finalize() self.submit_optimization_tasks(next_tasks) return False
def iterate(self): self.data["status"] = "RUNNING" if self.data["remaining_tasks"] is not False: # Create the query payload, fetching the completed required tasks and output location task_query = self.storage_socket.get_queue( { "id": self.data["required_tasks"], "status": ["COMPLETE", "ERROR"] }, projection={"base_result": True, "status": True}) # If all tasks are not complete, return a False if len(task_query["data"]) != len(self.data["required_tasks"]): return False if "ERROR" in set(x["status"] for x in task_query["data"]): raise KeyError("All tasks did not execute successfully.") # Create a lookup table for task ID mapping to result from that task in the procedure table inv_task_lookup = { x["id"]: self.storage_socket.get_procedures({ "id": x["base_result"]["_ref"].id })["data"][0] for x in task_query["data"] } # Populate task results task_results = {} for key, task_ids in self.data["task_map"].items(): task_results[key] = [] # Check for history key if key not in self.data["optimization_history"]: self.data["optimization_history"][key] = [] for task_id in task_ids: # Cycle through all tasks for this entry ret = inv_task_lookup[task_id] # Lookup molecules mol_keys = self.storage_socket.get_molecules( [ret["initial_molecule"], ret["final_molecule"]], index="id")["data"] task_results[key].append((mol_keys[0]["geometry"], mol_keys[1]["geometry"], ret["energies"][-1])) # Update history self.data["optimization_history"][key].append(ret["id"]) td_api.update_state(self.data["torsiondrive_state"], task_results) # print("\nTorsionDrive State Updated:") # print(json.dumps(self.data["torsiondrive_state"], indent=2)) # Figure out if we are still waiting on tasks # Create new tasks from the current state next_tasks = td_api.next_jobs_from_state(self.data["torsiondrive_state"], verbose=True) # print("\n\nNext Jobs:\n" + str(next_tasks)) # All done if len(next_tasks) == 0: self.finalize() return self.data self.submit_optimization_tasks(next_tasks) return False
def iterate(self): self.data["status"] = "RUNNING" # print("\nTorsionDrive State:") # print(json.dumps(self.data["torsiondrive_state"], indent=2)) # print("Iterate") if (self.data["remaining_jobs"] > 0): # print("Iterate: not yet done", self.data["remaining_jobs"]) # print("Complete jobs", self.data["complete_jobs"]) return False # print(self.data["remaining_jobs"]) # Required jobs is false on first iteration if (self.data["remaining_jobs"] is not False) and (self.data["remaining_jobs"] == 0): # Query the jobs job_query = self.storage_socket.get_procedures(list( self.data["complete_jobs"].values()), by_id=True) # Figure out the structure job_results = { k: [None] * v for k, v in self.data["update_structure"].items() } job_ids = { k: [None] * v for k, v in self.data["update_structure"].items() } inv_job_lookup = { v: k for k, v in self.data["complete_jobs"].items() } for ret in job_query["data"]: job_uid = inv_job_lookup[ret["id"]] value, pos = self.data["job_map"][job_uid] mol_keys = self.storage_socket.get_molecules( [ret["initial_molecule"], ret["final_molecule"]], index="id")["data"] job_results[value][int(pos)] = (mol_keys[0]["geometry"], mol_keys[1]["geometry"], ret["energies"][-1]) job_ids[value][int(pos)] = ret["id"] # Update the complete_jobs in order for k, v in job_ids.items(): if k not in self.data["optimization_history"]: self.data["optimization_history"][k] = [] self.data["optimization_history"][k].extend(v) td_api.update_state(self.data["torsiondrive_state"], job_results) # print("\nTorsionDrive State Updated:") # print(json.dumps(self.data["torsiondrive_state"], indent=2)) # Figure out if we are still waiting on jobs # Create new jobs from the current state next_jobs = td_api.next_jobs_from_state( self.data["torsiondrive_state"], verbose=True) # All done if len(next_jobs) == 0: self.finalize() return self.data self.submit_optimization_tasks(next_jobs) return False