def test_list_services(self):
        services = scol.list_services()
        self.assertEqual(len(services), 0, "services count should be 0")

        s1 = FakeService('1111', 's1')
        s2 = FakeService('2222', 's2')
        s3 = FakeService('3333', 's3')
        scol.add(s1)
        scol.add(s2)

        services = scol.list_services()
        self.assertEqual(len(services), 2, "services count should be 2")
        self.assertIn(s1, services, 'service s1 should be in the returned service list')
        self.assertIn(s2, services, 'service s2 should be in the returned service list')
        self.assertNotIn(s3, services, 'service s3 should not be in the returned service list')
def _find_services_to_be_scheduled(actions):
    services_guids = []

    for action_item in actions:
        template_uid = None
        template = action_item.get("template")
        if template:
            template_uid = TemplateUID.parse(template)

        service = action_item.get("service")

        candidates = []

        kwargs = {"name": service}
        if template_uid:
            kwargs.update({
                "template_host": template_uid.host,
                "template_account": template_uid.account,
                "template_repo": template_uid.repo,
                "template_name": template_uid.name,
                "template_version": template_uid.version,
            })
        # filter out None value
        kwargs = {k: v for k, v in kwargs.items() if v is not None}

        if len(kwargs) > 0:
            candidates = scol.find(**kwargs)
        else:
            candidates = scol.list_services()

        services_guids.extend([s.guid for s in candidates])
    return services_guids
예제 #3
0
def monitor_robot_metrics():
    # services
    nr_services = Gauge("robot_services_total", "Number of services running")
    nr_services.set_function(lambda: len(scol.list_services()))
    # memory
    robot_memory = Gauge('robot_total_memory_bytes', "Memory used by 0-robot")
    robot_memory.set_function(memory_usage_resource())
예제 #4
0
    def test_instantiate_service(self):
        services = [
            {
                'template': 'github.com/threefoldtech/0-robot/node/0.0.1',
                'service': 'name',
            },
            {
                'template': 'github.com/threefoldtech/0-robot/vm/0.0.1',
                'service': 'name',
            },
        ]

        service_created, err_code, err_msg = instantiate_services(services)
        assert err_code is None
        assert err_msg is None

        self.assertEqual(len(scol.list_services()), 2)
        self.assertEqual(
            len(
                scol.find(
                    template_uid='github.com/threefoldtech/0-robot/node/0.0.1')
            ), 1)
        self.assertEqual(
            len(
                scol.find(
                    template_uid='github.com/threefoldtech/0-robot/vm/0.0.1')),
            1)
예제 #5
0
def _trim_tasks(period=7200):  # default 2 hours ago
    """
    this greenlet delete the task of all services that are older
    then 'period'
    This is to limit the amount of storage used to keep track of the tasks
    """
    logger = j.logger.get('zerorobot')
    while True:
        try:
            time.sleep(20 * 60)  # runs every 20 minutes
            ago = int(time.time()) - period

            for service in scol.list_services():
                if not hasattr(service.task_list._done, 'delete_until'):
                    continue

                # don't need to trim old task if we have 50 or less tasks
                if service.task_list._done.count() <= 50:
                    continue
                # delete all task that have been created before ago
                service.task_list._done.delete_until(ago)

        except gevent.GreenletExit:
            # exit properly
            return
        except:
            logger.exception("error deleting old tasks")
            continue
예제 #6
0
파일: robot.py 프로젝트: albayck/0-robot
 def _save_services(self):
     """
     serialize all the services on disk
     """
     for service in scol.list_services():
         # stop all the greenlets attached to the services
         service.gl_mgr.stop_all()
         service.save()
예제 #7
0
    def names(self):
        """
        Return a dictionnary that contains all the service present on the local 0-robot

        key is the name of the service
        value is a Service
        """
        services = {s.name: s for s in scol.list_services()}
        return services
예제 #8
0
def GetMetricsHandler():
    mem_active, mem_total, mem_cached, mem_free, swap_total, swap_free = mem_stat.mem_stats()
    output = {
        'cpu': cpu_stat.cpu_percents(),
        'memory': {
            'total': mem_total,
            'active': mem_active,
            'free': mem_free,
            'cached': mem_cached,
            'swap_total': swap_total,
            'swap_free': swap_free,
        },
        'nr_services': len(scol.list_services())
    }
    return jsonify(output)
예제 #9
0
def load_services(data_dir):
    if not os.path.exists(data_dir):
        os.makedirs(data_dir)

    for srv_dir in j.sal.fs.listDirsInDir(data_dir, recursive=True):
        info_path = os.path.join(srv_dir, 'service.yaml')
        if not os.path.exists(info_path):
            continue
        service_info = j.data.serializer.yaml.load(info_path)

        tmpl_uid = TemplateUID.parse(service_info['template'])
        try:
            tmplClass = tcol.get(str(tmpl_uid))
        except tcol.TemplateNotFoundError:
            # template of the service not found, could be we have the template but not the same version
            # try to get the template without specifiying version
            tmplClasses = tcol.find(host=tmpl_uid.host, account=tmpl_uid.account, repo=tmpl_uid.repo, name=tmpl_uid.name)
            size = len(tmplClasses)
            if size > 1:
                raise RuntimeError("more then one template version found, this should never happens")
            elif size < 1:
                # if the template is not found, try to add the repo using the info of the service template uid
                url = "http://%s/%s/%s" % (tmpl_uid.host, tmpl_uid.account, tmpl_uid.repo)
                tcol.add_repo(url)
                tmplClass = tcol.get(service_info['template'])
            else:
                # template of another version found, use newer version to load the service
                tmplClass = tmplClasses[0]

        srv = scol.load(tmplClass, srv_dir)

    loading_failed = []
    for service in scol.list_services():
        try:
            service.validate()
        except Exception as err:
            logger.error("fail to load %s: %s" % (service.guid, str(err)))
            # the service is not going to process its task list until it can
            # execute validate() without problem
            service.gl_mgr.stop('executor')
            loading_failed.append(service)

    if len(loading_failed) > 0:
        gevent.spawn(_try_load_service, loading_failed)
예제 #10
0
    def test_instantiate_service_error(self):
        services = [
            {
                'template': 'github.com/threefoldtech/0-robot/node/0.0.1',
                'service': 'node1',
                'data': {},
            },
            {
                'template': 'github.com/threefoldtech/0-robot/validate/0.0.1',
                'service': 'name',
                'data': {},
            },
        ]

        service_created, err_code, err_msg = instantiate_services(services)
        assert err_code == 500
        assert err_msg == 'fail to create service name github.com/threefoldtech/0-robot/validate/0.0.1'
        assert len(service_created) == 1

        assert len(scol.list_services(
        )) == 0, "service created during a failed blueprint, should be deleted"
예제 #11
0
    def test_instantiate_service_error(self):
        services = [
            {
                'template': 'github.com/zero-os/0-robot/node/0.0.1',
                'service': 'node1',
                'data': {},
            },
            {
                'template': 'github.com/zero-os/0-robot/validate/0.0.1',
                'service': 'name',
                'data': {},
            },
        ]

        service_created, err_code, err_msg = instantiate_services(services)
        assert err_code == 500
        assert err_msg == 'required need to be specified in the data'
        assert len(service_created) == 1

        assert len(scol.list_services(
        )) == 0, "service created during a failed blueprint, should be deleted"
예제 #12
0
    def stop(self, timeout=30):
        """
        1. stop receiving requests on the REST API
        2. wait all currently active request finishes
        3. stop all services
        4. wait all services stop gracefully
        5. serialize all services state to disk
        6. exit the process
        """
        logger = j.logger.get('zerorobot')
        logger.info('stopping robot')

        # prevent the signal handler to be called again if
        # more signal are received
        for h in self._sig_handler:
            h.cancel()

        logger.info("stop REST API")
        logger.info("waiting request to finish")
        self._http.stop(timeout=10)
        self._addr = None

        logger.info("waiting for services to stop")
        pool = Pool(30)

        def stop_service(service):
            try:
                service._gracefull_stop(timeout=timeout)
            except Exception as err:
                logger.warning(
                    'exception raised while waiting %s %s (%s) to finish: %s',
                    service.template_uid.name, service.name, service.guid, err)

        pool.map(stop_service, scol.list_services())

        # here no more requests are comming in, we can safely save all services
        self._save_services()

        # notify we can exist the process
        self._stop_event.set()
예제 #13
0
def load_services(config):

    for service_details in storage.list():
        tmpl_uid = TemplateUID.parse(service_details['service']['template'])

        try:
            tmplClass = tcol.get(str(tmpl_uid))
        except tcol.TemplateNotFoundError:
            # template of the service not found, could be we have the template but not the same version
            # try to get the template without specifiying version
            tmplClasses = tcol.find(host=tmpl_uid.host, account=tmpl_uid.account, repo=tmpl_uid.repo, name=tmpl_uid.name)
            size = len(tmplClasses)
            if size > 1:
                raise RuntimeError("more then one template version found, this should never happens")
            elif size < 1:
                # if the template is not found, try to add the repo using the info of the service template uid
                url = "http://%s/%s/%s" % (tmpl_uid.host, tmpl_uid.account, tmpl_uid.repo)
                tcol.add_repo(url)
                tmplClass = tcol.get(service_details['service']['template'])
            else:
                # template of another version found, use newer version to load the service
                tmplClass = tmplClasses[0]

        scol.load(tmplClass, service_details)

    loading_failed = []
    logger = j.logger.get('zerorobot')
    for service in scol.list_services():
        try:
            service.validate()
        except Exception as err:
            logger.error("fail to load %s: %s" % (service.guid, str(err)))
            # the service is not going to process its task list until it can
            # execute validate() without problem
            service.gl_mgr.stop('executor')
            loading_failed.append(service)

    if len(loading_failed) > 0:
        gevent.spawn(_try_load_service, loading_failed)
예제 #14
0
    def _auto_push(self):
        """
        run a coroutine that pushes the repository at provided interval
        provided interval is in minutes
        meant to be run as gevent greenlet/coroutine
        """
        while True:
            self.logger.debug("waiting interval")
            # gevent.sleep(seconds=self.interval*60)
            gevent.sleep(seconds=self.interval)
            if self.logger is not None:
                self.logger.debug("saving services and pushing data repo")
            _load_ssh_key()

            # save all services
            for service in scol.list_services():
                service.save()

            git = j.clients.git.get(basedir=self.repo_dir)
            git.commit(message='zrobot sync', addremove=True)
            git.push()

            self.last_pushed = datetime.datetime.now()
def _schedule_action(action_item):
    template_uid = None
    template = action_item.get("template")
    if template:
        template_uid = TemplateUID.parse(template)

    service = action_item.get("service")
    action = action_item.get("action")
    args = action_item.get("args")
    if args and not isinstance(args, dict):
        raise TypeError("args should be a dict not %s" % type(args))

    candidates = []

    kwargs = {"name": service}
    if template_uid:
        kwargs.update({
            "template_host": template_uid.host,
            "template_account": template_uid.account,
            "template_repo": template_uid.repo,
            "template_name": template_uid.name,
            "template_version": template_uid.version,
        })
    # filter out None value
    kwargs = {k: v for k, v in kwargs.items() if v is not None}

    if len(kwargs) > 0:
        candidates = scol.find(**kwargs)
    else:
        candidates = scol.list_services()

    tasks = []
    for service in candidates:
        t = service.schedule_action(action, args=args)
        tasks.append((t, service))
    return tasks