def _checkout_fw(self, fworker, launch_dir, fw_id=None, host=None, ip=None): """ (internal method) Finds a FireWork that's ready to be run, marks it as running, and returns it to the caller. The caller is responsible for running the FireWork. :param fworker: A FWorker instance :param host: the host making the request (for creating a Launch object) :param ip: the ip making the request (for creating a Launch object) :param launch_dir: the dir the FW will be run in (for creating a Launch object) :return: a FireWork, launch_id tuple """ m_fw, prev_launch_id = self._get_a_fw_to_run(fworker, fw_id) if not m_fw: return None, None # create or update a launch l_id = prev_launch_id if prev_launch_id else self.get_new_launch_id() m_launch = Launch('RUNNING', launch_dir, fworker, host, ip, launch_id=l_id, fw_id=m_fw.fw_id) self.launches.update({'launch_id': l_id}, m_launch.to_db_dict(), upsert=True) self.m_logger.debug('Created/updated Launch with launch_id: {}'.format(l_id)) # add launch to FW if not prev_launch_id: # we're appending a new FireWork m_fw.launches.append(m_launch) else: # we're updating an existing launch m_fw.launches = [m_launch if l.launch_id == m_launch.launch_id else l for l in m_fw.launches] m_fw.state = 'RUNNING' self._upsert_fws([m_fw]) self.m_logger.debug('Checked out FW with id: {}'.format(m_fw.fw_id)) return m_fw, l_id
def checkout_fw(self, fworker, launch_dir, fw_id=None, host=None, ip=None): """ (internal method) Finds a FireWork that's ready to be run, marks it as running, and returns it to the caller. The caller is responsible for running the FireWork. :param fworker: A FWorker instance :param host: the host making the request (for creating a Launch object) :param ip: the ip making the request (for creating a Launch object) :param launch_dir: the dir the FW will be run in (for creating a Launch object) :return: a FireWork, launch_id tuple """ # TODO: this method is confusing, says AJ of Xmas past. Clean it up, remove duplication, etc. m_fw = self._get_a_fw_to_run(fworker.query, fw_id) if not m_fw: return None, None # was this Launch previously reserved? If so, overwrite that reservation with this Launch # note that adding a new Launch is problematic from a duplicate run standpoint prev_reservations = [l for l in m_fw.launches if l.state == 'RESERVED'] reserved_launch = None if len(prev_reservations) == 0 else prev_reservations[0] state_history = reserved_launch.state_history if reserved_launch else None l_id = reserved_launch.launch_id if reserved_launch else self.get_new_launch_id() trackers = [Tracker.from_dict(f) for f in m_fw.spec['_trackers']] if '_trackers' in m_fw.spec else None m_launch = Launch('RUNNING', launch_dir, fworker, host, ip, trackers=trackers, state_history=state_history, launch_id=l_id, fw_id=m_fw.fw_id) self.launches.find_and_modify({'launch_id': m_launch.launch_id}, m_launch.to_db_dict(), upsert=True) self.m_logger.debug('Created/updated Launch with launch_id: {}'.format(l_id)) if not reserved_launch: # we're appending a new FireWork m_fw.launches.append(m_launch) else: # we're updating an existing launch m_fw.launches = [m_launch if l.launch_id == m_launch.launch_id else l for l in m_fw.launches] m_fw.state = 'RUNNING' self._upsert_fws([m_fw]) # update any duplicated runs for fw in self.fireworks.find( {'launches': l_id, 'state': {'$in': ['WAITING', 'READY', 'RESERVED', 'FIZZLED']}}, {'fw_id': 1}): fw_id = fw['fw_id'] fw = self.get_fw_by_id(fw_id) fw.state = 'RUNNING' self._upsert_fws([fw]) self.m_logger.debug('Checked out FW with id: {}'.format(m_fw.fw_id)) # use dict as return type, just to be compatible with multiprocessing return m_fw, l_id
def checkout_fw(self, fworker, launch_dir, fw_id=None, host=None, ip=None): """ (internal method) Finds a FireWork that's ready to be run, marks it as running, and returns it to the caller. The caller is responsible for running the FireWork. :param fworker: A FWorker instance :param host: the host making the request (for creating a Launch object) :param ip: the ip making the request (for creating a Launch object) :param launch_dir: the dir the FW will be run in (for creating a Launch object) :return: a FireWork, launch_id tuple """ # TODO: this method is confusing, says AJ of Xmas past. Clean it up, remove duplication, etc. m_fw = self._get_a_fw_to_run(fworker.query, fw_id) if not m_fw: return None, None # was this Launch previously reserved? If so, overwrite that reservation with this Launch # note that adding a new Launch is problematic from a duplicate run standpoint prev_reservations = [l for l in m_fw.launches if l.state == 'RESERVED'] reserved_launch = None if len(prev_reservations) == 0 else prev_reservations[0] state_history = reserved_launch.state_history if reserved_launch else None l_id = reserved_launch.launch_id if reserved_launch else self.get_new_launch_id() trackers = [Tracker.from_dict(f) for f in m_fw.spec['_trackers']] if '_trackers' in m_fw.spec else None m_launch = Launch('RUNNING', launch_dir, fworker, host, ip, trackers=trackers, state_history=state_history, launch_id=l_id, fw_id=m_fw.fw_id) self.launches.find_and_modify({'launch_id': m_launch.launch_id}, m_launch.to_db_dict(), upsert=True) self.m_logger.debug('Created/updated Launch with launch_id: {}'.format(l_id)) if not reserved_launch: # we're appending a new FireWork m_fw.launches.append(m_launch) else: # we're updating an existing launch m_fw.launches = [m_launch if l.launch_id == m_launch.launch_id else l for l in m_fw.launches] m_fw.state = 'RUNNING' self._upsert_fws([m_fw]) # update any duplicated runs for fw in self.fireworks.find( {'launches': l_id, 'state': {'$in': ['WAITING', 'READY', 'RESERVED', 'FIZZLED']}}, {'fw_id': 1}): fw_id = fw['fw_id'] fw = self.get_fw_by_id(fw_id) fw.state = 'RUNNING' self._upsert_fws([fw]) self.m_logger.debug('Checked out FW with id: {}'.format(m_fw.fw_id)) return m_fw, l_id
def _checkout_fw(self, fworker, launch_dir, fw_id=None, host=None, ip=None): """ (internal method) Finds a FireWork that's ready to be run, marks it as running, and returns it to the caller. The caller is responsible for running the FireWork. :param fworker: A FWorker instance :param host: the host making the request (for creating a Launch object) :param ip: the ip making the request (for creating a Launch object) :param launch_dir: the dir the FW will be run in (for creating a Launch object) :return: a FireWork, launch_id tuple """ m_fw, prev_launch_id = self._get_a_fw_to_run(fworker, fw_id) if not m_fw: return None, None # create or update a launch l_id = prev_launch_id if prev_launch_id else self.get_new_launch_id() m_launch = Launch('RUNNING', launch_dir, fworker, host, ip, launch_id=l_id, fw_id=m_fw.fw_id) self.launches.update({'launch_id': l_id}, m_launch.to_db_dict(), upsert=True) self.m_logger.debug( 'Created/updated Launch with launch_id: {}'.format(l_id)) # add launch to FW if not prev_launch_id: # we're appending a new FireWork m_fw.launches.append(m_launch) else: # we're updating an existing launch m_fw.launches = [ m_launch if l.launch_id == m_launch.launch_id else l for l in m_fw.launches ] m_fw.state = 'RUNNING' self._upsert_fws([m_fw]) self.m_logger.debug('Checked out FW with id: {}'.format(m_fw.fw_id)) return m_fw, l_id
def _reserve_fw(self, fworker, launch_dir, host=None, ip=None): m_fw, lid = self._get_a_fw_to_run(fworker) if not m_fw: return None, None # create a launch # TODO: this code is duplicated with checkout_fw with minimal mods, should refactor this!! launch_id = self.get_new_launch_id() m_launch = Launch('RESERVED', launch_dir, fworker, host, ip, launch_id=launch_id, fw_id=m_fw.fw_id) self.launches.insert(m_launch.to_db_dict()) # add launch to FW m_fw.launches.append(m_launch) m_fw.state = 'RESERVED' self._upsert_fws([m_fw]) self.m_logger.debug('Reserved FW with id: {}'.format(m_fw.fw_id)) return m_fw, launch_id
def reserve_fw(self, fworker, launch_dir, host=None, ip=None): m_fw = self._get_a_fw_to_run(fworker.query) if not m_fw: return None, None # create a launch # TODO: this code is duplicated with checkout_fw with minimal mods, should refactor this!! launch_id = self.get_new_launch_id() trackers = [Tracker.from_dict(f) for f in m_fw.spec['_trackers']] if '_trackers' in m_fw.spec else None m_launch = Launch('RESERVED', launch_dir, fworker, host, ip, trackers=trackers, launch_id=launch_id, fw_id=m_fw.fw_id) self.launches.find_and_modify({'launch_id': m_launch.launch_id}, m_launch.to_db_dict(), upsert=True) # add launch to FW m_fw.launches.append(m_launch) m_fw.state = 'RESERVED' self._upsert_fws([m_fw]) self.m_logger.debug('Reserved FW with id: {}'.format(m_fw.fw_id)) return m_fw, launch_id
def _reserve_fw(self, fworker, launch_dir, host=None, ip=None): m_fw = self._get_a_fw_to_run(fworker.query) if not m_fw: return None, None # create a launch # TODO: this code is duplicated with checkout_fw with minimal mods, should refactor this!! launch_id = self.get_new_launch_id() trackers = [Tracker.from_dict(f) for f in m_fw.spec['_trackers']] if '_trackers' in m_fw.spec else None m_launch = Launch('RESERVED', launch_dir, fworker, host, ip, trackers=trackers, launch_id=launch_id, fw_id=m_fw.fw_id) self.launches.find_and_modify({'launch_id': m_launch.launch_id}, m_launch.to_db_dict(), upsert=True) # add launch to FW m_fw.launches.append(m_launch) m_fw.state = 'RESERVED' self._upsert_fws([m_fw]) self.m_logger.debug('Reserved FW with id: {}'.format(m_fw.fw_id)) return m_fw, launch_id
def get_launch_by_id(self, launch_id): """ Given a Launch id, return details of the Launch :param launch_id: launch id :return: Launch object """ m_launch = self.launches.find_one({'launch_id': launch_id}) if m_launch: return Launch.from_dict(m_launch) raise ValueError('No Launch exists with launch_id: {}'.format(launch_id))
def task_dict_to_wf(task_dict, launchpad): fw_id = launchpad.get_new_fw_id() l_id = launchpad.get_new_launch_id() spec = {'task_type': task_dict['task_type'], 'run_tags': task_dict['run_tags'], 'vaspinputset_name': None, 'vasp': None, 'mpsnl': task_dict['snl'], 'snlgroup_id': task_dict['snlgroup_id']} tasks = [DummyLegacyTask()] launch_dir = task_dict['dir_name_full'] stored_data = {'error_list': []} update_spec = {'prev_vasp_dir': task_dict['dir_name'], 'prev_task_type': spec['task_type'], 'mpsnl': spec['mpsnl'], 'snlgroup_id': spec['snlgroup_id'], 'run_tags': spec['run_tags']} fwaction = FWAction(stored_data=stored_data, update_spec=update_spec) if task_dict['completed_at']: complete_date = datetime.datetime.strptime(task_dict['completed_at'], "%Y-%m-%d %H:%M:%S") state_history = [{"created_on": complete_date, 'state': 'COMPLETED'}] else: state_history = [] launches = [Launch('COMPLETED', launch_dir, fworker=None, host=None, ip=None, action=fwaction, state_history=state_history, launch_id=l_id, fw_id=fw_id)] f = Composition.from_formula(task_dict['pretty_formula']).alphabetical_formula fw = FireWork(tasks, spec, name=get_slug(f + '--' + spec['task_type']), launches=launches, state='COMPLETED', created_on=None, fw_id=fw_id) wf_meta = get_meta_from_structure(Structure.from_dict(task_dict['snl'])) wf_meta['run_version'] = 'preproduction (0)' wf = Workflow.from_FireWork(fw, name=f, metadata=wf_meta) launchpad.add_wf(wf, reassign_all=False) launchpad._upsert_launch(launches[0]) print 'ADDED', fw_id # return fw_id return fw_id
def recover_offline(self, launch_id, ignore_errors=False): # get the launch directory m_launch = self.get_launch_by_id(launch_id) try: self.m_logger.debug("RECOVERING fw_id: {}".format(m_launch.fw_id)) # look for ping file - update the FireWork if this is the case ping_loc = os.path.join(m_launch.launch_dir, "FW_ping.json") if os.path.exists(ping_loc): with open(ping_loc) as f: ping_time = datetime.datetime.strptime(json.loads(f.read())['ping_time'], "%Y-%m-%dT%H:%M:%S.%f") self.ping_launch(launch_id, ping_time) # look for action in FW_offline.json offline_loc = os.path.join(m_launch.launch_dir, "FW_offline.json") with open(offline_loc) as f: offline_data = json.loads(f.read()) if 'started_on' in offline_data: m_launch.state = 'RUNNING' for s in m_launch.state_history: if s['state'] == 'RUNNING': s['created_on'] = datetime.datetime.strptime(offline_data['started_on'], "%Y-%m-%dT%H:%M:%S.%f") self.launches.find_and_modify({'launch_id': m_launch.launch_id}, m_launch.to_db_dict(), upsert=True) if 'fwaction' in offline_data: fwaction = FWAction.from_dict(offline_data['fwaction']) state = offline_data['state'] m_launch = Launch.from_dict( self.complete_launch(launch_id, fwaction, state)) for s in m_launch.state_history: if s['state'] == offline_data['state']: s['created_on'] = datetime.datetime.strptime(offline_data['completed_on'], "%Y-%m-%dT%H:%M:%S.%f") self.launches.find_and_modify({'launch_id': m_launch.launch_id}, m_launch.to_db_dict(), upsert=True) self.offline_runs.update({"launch_id": launch_id}, {"$set": {"completed":True}}) # update the updated_on self.offline_runs.update({"launch_id": launch_id}, {"$set": {"updated_on": datetime.datetime.utcnow().isoformat()}}) return None except: if not ignore_errors: traceback.print_exc() return m_launch.fw_id