def test_list_services(self): services = scol.list_services() self.assertEqual(len(services), 0, "services count should be 0") s1 = FakeService('1111', 's1') s2 = FakeService('2222', 's2') s3 = FakeService('3333', 's3') scol.add(s1) scol.add(s2) services = scol.list_services() self.assertEqual(len(services), 2, "services count should be 2") self.assertIn(s1, services, 'service s1 should be in the returned service list') self.assertIn(s2, services, 'service s2 should be in the returned service list') self.assertNotIn(s3, services, 'service s3 should not be in the returned service list')
def _find_services_to_be_scheduled(actions): services_guids = [] for action_item in actions: template_uid = None template = action_item.get("template") if template: template_uid = TemplateUID.parse(template) service = action_item.get("service") candidates = [] kwargs = {"name": service} if template_uid: kwargs.update({ "template_host": template_uid.host, "template_account": template_uid.account, "template_repo": template_uid.repo, "template_name": template_uid.name, "template_version": template_uid.version, }) # filter out None value kwargs = {k: v for k, v in kwargs.items() if v is not None} if len(kwargs) > 0: candidates = scol.find(**kwargs) else: candidates = scol.list_services() services_guids.extend([s.guid for s in candidates]) return services_guids
def monitor_robot_metrics(): # services nr_services = Gauge("robot_services_total", "Number of services running") nr_services.set_function(lambda: len(scol.list_services())) # memory robot_memory = Gauge('robot_total_memory_bytes', "Memory used by 0-robot") robot_memory.set_function(memory_usage_resource())
def test_instantiate_service(self): services = [ { 'template': 'github.com/threefoldtech/0-robot/node/0.0.1', 'service': 'name', }, { 'template': 'github.com/threefoldtech/0-robot/vm/0.0.1', 'service': 'name', }, ] service_created, err_code, err_msg = instantiate_services(services) assert err_code is None assert err_msg is None self.assertEqual(len(scol.list_services()), 2) self.assertEqual( len( scol.find( template_uid='github.com/threefoldtech/0-robot/node/0.0.1') ), 1) self.assertEqual( len( scol.find( template_uid='github.com/threefoldtech/0-robot/vm/0.0.1')), 1)
def _trim_tasks(period=7200): # default 2 hours ago """ this greenlet delete the task of all services that are older then 'period' This is to limit the amount of storage used to keep track of the tasks """ logger = j.logger.get('zerorobot') while True: try: time.sleep(20 * 60) # runs every 20 minutes ago = int(time.time()) - period for service in scol.list_services(): if not hasattr(service.task_list._done, 'delete_until'): continue # don't need to trim old task if we have 50 or less tasks if service.task_list._done.count() <= 50: continue # delete all task that have been created before ago service.task_list._done.delete_until(ago) except gevent.GreenletExit: # exit properly return except: logger.exception("error deleting old tasks") continue
def _save_services(self): """ serialize all the services on disk """ for service in scol.list_services(): # stop all the greenlets attached to the services service.gl_mgr.stop_all() service.save()
def names(self): """ Return a dictionnary that contains all the service present on the local 0-robot key is the name of the service value is a Service """ services = {s.name: s for s in scol.list_services()} return services
def GetMetricsHandler(): mem_active, mem_total, mem_cached, mem_free, swap_total, swap_free = mem_stat.mem_stats() output = { 'cpu': cpu_stat.cpu_percents(), 'memory': { 'total': mem_total, 'active': mem_active, 'free': mem_free, 'cached': mem_cached, 'swap_total': swap_total, 'swap_free': swap_free, }, 'nr_services': len(scol.list_services()) } return jsonify(output)
def load_services(data_dir): if not os.path.exists(data_dir): os.makedirs(data_dir) for srv_dir in j.sal.fs.listDirsInDir(data_dir, recursive=True): info_path = os.path.join(srv_dir, 'service.yaml') if not os.path.exists(info_path): continue service_info = j.data.serializer.yaml.load(info_path) tmpl_uid = TemplateUID.parse(service_info['template']) try: tmplClass = tcol.get(str(tmpl_uid)) except tcol.TemplateNotFoundError: # template of the service not found, could be we have the template but not the same version # try to get the template without specifiying version tmplClasses = tcol.find(host=tmpl_uid.host, account=tmpl_uid.account, repo=tmpl_uid.repo, name=tmpl_uid.name) size = len(tmplClasses) if size > 1: raise RuntimeError("more then one template version found, this should never happens") elif size < 1: # if the template is not found, try to add the repo using the info of the service template uid url = "http://%s/%s/%s" % (tmpl_uid.host, tmpl_uid.account, tmpl_uid.repo) tcol.add_repo(url) tmplClass = tcol.get(service_info['template']) else: # template of another version found, use newer version to load the service tmplClass = tmplClasses[0] srv = scol.load(tmplClass, srv_dir) loading_failed = [] for service in scol.list_services(): try: service.validate() except Exception as err: logger.error("fail to load %s: %s" % (service.guid, str(err))) # the service is not going to process its task list until it can # execute validate() without problem service.gl_mgr.stop('executor') loading_failed.append(service) if len(loading_failed) > 0: gevent.spawn(_try_load_service, loading_failed)
def test_instantiate_service_error(self): services = [ { 'template': 'github.com/threefoldtech/0-robot/node/0.0.1', 'service': 'node1', 'data': {}, }, { 'template': 'github.com/threefoldtech/0-robot/validate/0.0.1', 'service': 'name', 'data': {}, }, ] service_created, err_code, err_msg = instantiate_services(services) assert err_code == 500 assert err_msg == 'fail to create service name github.com/threefoldtech/0-robot/validate/0.0.1' assert len(service_created) == 1 assert len(scol.list_services( )) == 0, "service created during a failed blueprint, should be deleted"
def test_instantiate_service_error(self): services = [ { 'template': 'github.com/zero-os/0-robot/node/0.0.1', 'service': 'node1', 'data': {}, }, { 'template': 'github.com/zero-os/0-robot/validate/0.0.1', 'service': 'name', 'data': {}, }, ] service_created, err_code, err_msg = instantiate_services(services) assert err_code == 500 assert err_msg == 'required need to be specified in the data' assert len(service_created) == 1 assert len(scol.list_services( )) == 0, "service created during a failed blueprint, should be deleted"
def stop(self, timeout=30): """ 1. stop receiving requests on the REST API 2. wait all currently active request finishes 3. stop all services 4. wait all services stop gracefully 5. serialize all services state to disk 6. exit the process """ logger = j.logger.get('zerorobot') logger.info('stopping robot') # prevent the signal handler to be called again if # more signal are received for h in self._sig_handler: h.cancel() logger.info("stop REST API") logger.info("waiting request to finish") self._http.stop(timeout=10) self._addr = None logger.info("waiting for services to stop") pool = Pool(30) def stop_service(service): try: service._gracefull_stop(timeout=timeout) except Exception as err: logger.warning( 'exception raised while waiting %s %s (%s) to finish: %s', service.template_uid.name, service.name, service.guid, err) pool.map(stop_service, scol.list_services()) # here no more requests are comming in, we can safely save all services self._save_services() # notify we can exist the process self._stop_event.set()
def load_services(config): for service_details in storage.list(): tmpl_uid = TemplateUID.parse(service_details['service']['template']) try: tmplClass = tcol.get(str(tmpl_uid)) except tcol.TemplateNotFoundError: # template of the service not found, could be we have the template but not the same version # try to get the template without specifiying version tmplClasses = tcol.find(host=tmpl_uid.host, account=tmpl_uid.account, repo=tmpl_uid.repo, name=tmpl_uid.name) size = len(tmplClasses) if size > 1: raise RuntimeError("more then one template version found, this should never happens") elif size < 1: # if the template is not found, try to add the repo using the info of the service template uid url = "http://%s/%s/%s" % (tmpl_uid.host, tmpl_uid.account, tmpl_uid.repo) tcol.add_repo(url) tmplClass = tcol.get(service_details['service']['template']) else: # template of another version found, use newer version to load the service tmplClass = tmplClasses[0] scol.load(tmplClass, service_details) loading_failed = [] logger = j.logger.get('zerorobot') for service in scol.list_services(): try: service.validate() except Exception as err: logger.error("fail to load %s: %s" % (service.guid, str(err))) # the service is not going to process its task list until it can # execute validate() without problem service.gl_mgr.stop('executor') loading_failed.append(service) if len(loading_failed) > 0: gevent.spawn(_try_load_service, loading_failed)
def _auto_push(self): """ run a coroutine that pushes the repository at provided interval provided interval is in minutes meant to be run as gevent greenlet/coroutine """ while True: self.logger.debug("waiting interval") # gevent.sleep(seconds=self.interval*60) gevent.sleep(seconds=self.interval) if self.logger is not None: self.logger.debug("saving services and pushing data repo") _load_ssh_key() # save all services for service in scol.list_services(): service.save() git = j.clients.git.get(basedir=self.repo_dir) git.commit(message='zrobot sync', addremove=True) git.push() self.last_pushed = datetime.datetime.now()
def _schedule_action(action_item): template_uid = None template = action_item.get("template") if template: template_uid = TemplateUID.parse(template) service = action_item.get("service") action = action_item.get("action") args = action_item.get("args") if args and not isinstance(args, dict): raise TypeError("args should be a dict not %s" % type(args)) candidates = [] kwargs = {"name": service} if template_uid: kwargs.update({ "template_host": template_uid.host, "template_account": template_uid.account, "template_repo": template_uid.repo, "template_name": template_uid.name, "template_version": template_uid.version, }) # filter out None value kwargs = {k: v for k, v in kwargs.items() if v is not None} if len(kwargs) > 0: candidates = scol.find(**kwargs) else: candidates = scol.list_services() tasks = [] for service in candidates: t = service.schedule_action(action, args=args) tasks.append((t, service)) return tasks