示例#1
0
def check_orphaned():
    """
    Machines created in providers might be in an error state or some
    configuration in between may have prevented them to join Jenkins (or
    manually removed). This task will go through the nodes it knows about, make
    sure they exist in the provider and if so, remove them from the mita
    database and the provider.
    """
    conn = connections.jenkins_connection()
    try:
        nodes = models.Node.query.all()
    except InvalidRequestError:
        logger.exception('could not list nodes')
        models.rollback()
        # we can try again at the next scheduled task run
        return

    for node in nodes:
        # it is all good if this node exists in Jenkins. That is the whole
        # reason for its miserable existence, to work for Mr. Jenkins. Let it
        # be.
        if node.jenkins_name:
            if conn.node_exists(node.jenkins_name):
                continue
        # So this node is not in Jenkins. If it is less than 15 minutes then
        # don't do anything because it might be just taking a while to join.
        # ALERT MR ROBINSON: 15 minutes is a magical number.
        now = datetime.utcnow()
        difference = now - node.created
        if difference.seconds > 900:  # magical number alert
            logger.info("found created node that didn't join Jenkins: %s",
                        node)
            provider = providers.get(node.provider)
            # "We often miss opportunity because it's dressed in overalls and
            # looks like work". Node missed his opportunity here.
            try:
                provider.destroy_node(name=node.cloud_name)
            except CloudNodeNotFound:
                logger.info("cloud was not found on provider: %s",
                            node.cloud_name)
                logger.info(
                    "will remove node from database, API confirms it no longer exists"
                )
                node.delete()
                models.commit()
            except Exception:
                logger.exception("unable to destroy node: %s", node.cloud_name)
                logger.error("will skip database removal")
                continue

    # providers can purge nodes in error state too, try to prune those as well
    providers_conf = pecan.conf.provider.to_dict()
    for provider_name in providers_conf.keys():
        provider = providers.get(provider_name)
        provider.purge()
示例#2
0
def check_orphaned():
    """
    Machines created in providers might be in an error state or some
    configuration in between may have prevented them to join Jenkins (or
    manually removed). This task will go through the nodes it knows about, make
    sure they exist in the provider and if so, remove them from the mita
    database and the provider.
    """
    conn = connections.jenkins_connection()
    nodes = models.Node.query.all()

    for node in nodes:
        # it is all good if this node exists in Jenkins. That is the whole
        # reason for its miserable existence, to work for Mr. Jenkins. Let it
        # be.
        if node.jenkins_name:
            if conn.node_exists(node.jenkins_name):
                continue
        # So this node is not in Jenkins. If it is less than 15 minutes then
        # don't do anything because it might be just taking a while to join.
        # ALERT MR ROBINSON: 15 minutes is a magical number.
        now = datetime.utcnow()
        difference = now - node.created
        if difference.seconds > 900:  # magical number alert
            logger.info("found created node that didn't join Jenkins: %s", node)
            provider = providers.get(node.provider)
            # "We often miss opportunity because it's dressed in overalls and
            # looks like work". Node missed his opportunity here.
            try:
                provider.destroy_node(name=node.cloud_name)
            except Exception:
                logger.exception("unable to destroy node: %s", node.cloud_name)
            logger.info("removed useless node from provider and database: %s", node)
            node.delete()
            models.commit()
示例#3
0
文件: nodes.py 项目: ceph/mita
 def delete(self):
     if request.method != 'POST':
         abort(405)
     if not self.node:
         abort(404)
     # XXX we need validation here
     # XXX WE REALLY NEED VALIDATION HERE
     try:
         delay = request.json.get('delay', 0)
     # simplejson.decoder.JSONDecodeError inherits from ValueError which is
     # the same that the builtin Python json handler will raise when no JSON
     # is passed in.
     except ValueError:
         delay = 0
     if delay:
         delete_node.apply_async(
             (self.node.id,),
             countdown=delay)
     else:
         delete_provider_node(
             providers.get(self.node.provider),
             self.node.cloud_name
         )
         delete_jenkins_node(self.node.jenkins_name)
         self.node.delete()
示例#4
0
文件: nodes.py 项目: ceph/mita
    def idle(self):
        """
        perform a check on the status of the current node, verifying how long
        it has been idle (if at all) marking the current timestamp since idle
        and determine if the node needs to be terminated.
        """
        if not self.node:
            abort(404, 'could not find UUID: %s' % self.identifier)
        provider_for_node = conf.nodes[self.node.name]['provider']
        provider = providers.get(provider_for_node)
        if request.method != 'POST':
            abort(405)
        now = datetime.utcnow()
        if self.node.idle:
            # it was idle before so check how many seconds since it was lazy.
            # `idle` is a property that will only be true-ish if idle_since has
            # been set.
            difference = now - self.node.idle_since
            if difference.seconds > 1200:  # 20 minutes
                # we need to terminate this couch potato
                logger.info("Destroying node: %s" % self.node.cloud_name)
                try:
                    provider.destroy_node(name=self.node.cloud_name)
                except CloudNodeNotFound:
                    logger.info("node does not exist in cloud provider")
                conn = jenkins_connection()
                if conn.node_exists(self.node.jenkins_name):
                    logger.info("Deleting node in jenkins: %s" % self.node.jenkins_name)
                    conn.delete_node(self.node.jenkins_name)
                # delete from our database
                self.node.delete()

        else:  # mark it as being idle
            self.node.idle_since = now
示例#5
0
 def delete(self):
     if request.method != 'POST':
         abort(405)
     if not self.node:
         abort(404)
     # XXX we need validation here
     # XXX WE REALLY NEED VALIDATION HERE
     try:
         delay = request.json.get('delay', 0)
     # simplejson.decoder.JSONDecodeError inherits from ValueError which is
     # the same that the builtin Python json handler will raise when no JSON
     # is passed in.
     except ValueError:
         delay = 0
     if delay:
         delete_node.apply_async(
             (self.node.id,),
             countdown=delay)
     else:
         delete_provider_node(
             providers.get(self.node.provider),
             self.node.cloud_name
         )
         delete_jenkins_node(self.node.jenkins_name)
         self.node.delete()
示例#6
0
    def idle(self):
        """
        perform a check on the status of the current node, verifying how long
        it has been idle (if at all) marking the current timestamp since idle
        and determine if the node needs to be terminated.
        """
        if not self.node:
            abort(404, 'could not find UUID: %s' % self.identifier)
        provider_for_node = conf.nodes[self.node.name]['provider']
        provider = providers.get(provider_for_node)
        if request.method != 'POST':
            abort(405)
        now = datetime.utcnow()
        if self.node.idle:
            # it was idle before so check how many seconds since it was lazy.
            # `idle` is a property that will only be true-ish if idle_since has
            # been set.
            difference = now - self.node.idle_since
            if difference.seconds > 600:  # 10 minutes
                # we need to terminate this couch potato
                logger.info("Destroying node: %s" % self.node.cloud_name)
                try:
                    provider.destroy_node(name=self.node.cloud_name)
                except CloudNodeNotFound:
                    logger.info("node does not exist in cloud provider")
                conn = jenkins_connection()
                if conn.node_exists(self.node.jenkins_name):
                    logger.info("Deleting node in jenkins: %s" % self.node.jenkins_name)
                    conn.delete_node(self.node.jenkins_name)
                # delete from our database
                self.node.delete()

        else:  # mark it as being idle
            self.node.idle_since = now
示例#7
0
 def status(self, **kw):
     # since this is a read-only request via GET we need to ask for query
     # args to determine the right node because the name alone is not good
     # enoug (we might have more than one node named 'centos6' for example.
     provider = providers.get(request.json['provider'])
     status = provider.node_status(self.node_name, **kw)
     state = NodeState[status]
     state_int = NodeState[state]
     return {'status': state, 'status_int': state_int}
示例#8
0
 def status(self, **kw):
     # since this is a read-only request via GET we need to ask for query
     # args to determine the right node because the name alone is not good
     # enoug (we might have more than one node named 'centos6' for example.
     provider = providers.get(request.json['provider'])
     status = provider.node_status(self.node_name, **kw)
     state = NodeState[status]
     state_int = NodeState[state]
     return {'status': state, 'status_int': state_int}
示例#9
0
def delete_node(node_id):
    node = models.Node.get(node_id)
    if not node:
        logger.warning('async node deletion could not be completed')
        logger.warning('%s node id no longer exists', node_id)
        return

    util.delete_provider_node(providers.get(node.provider), node.cloud_name)
    util.delete_jenkins_node(node.jenkins_name)
    node.delete()
    models.commit()
示例#10
0
def delete_node(node_id):
    node = models.Node.get(node_id)
    if not node:
        logger.warning('async node deletion could not be completed')
        logger.warning('%s node id no longer exists', node_id)
        return

    util.delete_provider_node(
        providers.get(node.provider),
        node.cloud_name
    )
    util.delete_jenkins_node(node.jenkins_name)
    node.delete()
    models.commit()
示例#11
0
    def index(self):
        provider = providers.get(request.json['provider'])
        # request.json is read-only, since we are going to add extra metadata
        # to get the classes created, make a clean copy
        _json = deepcopy(request.json)

        # Before creating a node, check if it has already been created by us:
        name = _json['name']
        keyname = _json['keyname']
        image_name = _json['image_name']
        size = _json['size']
        labels = _json['labels']
        script = _json['script']
        existing_nodes = Node.filter_by(
            name=name,
            keyname=keyname,
            image_name=image_name,
            size=size,
        ).all()

        matching_nodes = [n for n in existing_nodes if n.labels_match(labels)]
        if not matching_nodes:  # we don't have anything that matches this that has been ever created
            logger.info('requested node does not exist, will create one')
            # slap the UUID into the new node details
            _id = str(uuid.uuid4())
            logger.info('changing name: %s' % _json['name'])
            _json['name'] = "%s__%s" % (name, _id)
            logger.info('changed name into something else: %s' % _json['name'])
            # try to slap it into the script, it is not OK if we are not allowed to, assume we should
            try:
                _json['script'] = script % _id
            except TypeError:
                logger.error('attempted to add a UUID to the script but failed')
                logger.error('ensure that a formatting entry exists, like: %%s')
                return  # do not add anything if we haven't been able to format
            logger.warning('creating node with details: %s' % str(_json))
            provider.create_node(**_json)
            _json.pop('name')
            Node(
                name=request.json['name'],
                identifier=_id,
                **_json
            )
示例#12
0
 def delete(self):
     if request.method != 'POST':
         abort(405)
     if not self.node:
         abort(404)
     provider = providers.get(request.json['provider'])
     try:
         # destroy from the cloud provider
         destroyed = provider.destroy_node(**request.json)
         if not destroyed:
             # FIXME: this needs to return a proper reponse, not just a 500
             abort(500)
         # delete from the database
         self.node.delete()
     # FIXME: catch the exception from libcloud here
     except Exception:
         # find a way to REALLY sound the alarm because
         # if we can't delete it means that the user is going
         # to pay for a resource it should no longer exist
         abort(500)
示例#13
0
def check_orphaned():
    """
    Machines created in providers might be in an error state or some
    configuration in between may have prevented them to join Jenkins (or
    manually removed). This task will go through the nodes it knows about, make
    sure they exist in the provider and if so, remove them from the mita
    database and the provider.
    """
    conn = connections.jenkins_connection()
    nodes = models.Node.query.all()

    for node in nodes:
        # it is all good if this node exists in Jenkins. That is the whole
        # reason for its miserable existence, to work for Mr. Jenkins. Let it
        # be.
        if node.jenkins_name:
            if conn.node_exists(node.jenkins_name):
                continue
        # So this node is not in Jenkins. If it is less than 15 minutes then
        # don't do anything because it might be just taking a while to join.
        # ALERT MR ROBINSON: 15 minutes is a magical number.
        now = datetime.utcnow()
        difference = now - node.created
        if difference.seconds > 900:  # magical number alert
            logger.info("found created node that didn't join Jenkins: %s",
                        node)
            provider = providers.get(node.provider)
            # "We often miss opportunity because it's dressed in overalls and
            # looks like work". Node missed his opportunity here.
            try:
                provider.destroy_node(name=node.cloud_name)
            except Exception:
                logger.exception("unable to destroy node: %s", node.cloud_name)
                logger.error("will skip database removal")
                return
            logger.info("removed useless node from provider and database: %s",
                        node)
            node.delete()
            models.commit()
示例#14
0
文件: nodes.py 项目: ceph/mita
    def index(self):
        provider = providers.get(request.json['provider'])
        # request.json is read-only, since we are going to add extra metadata
        # to get the classes created, make a clean copy
        _json = deepcopy(request.json)

        # Before creating a node, check if it has already been created by us:
        name = _json['name']
        keyname = _json['keyname']
        image_name = _json['image_name']
        size = _json['size']
        labels = _json['labels']
        script = _json['script']
        count = _json.get('count', 1)
        # a buffered count is 3/4 what is needed rounded up
        buffered_count = int(round(count * 0.75))
        existing_nodes = Node.filter_by(
            name=name,
            keyname=keyname,
            image_name=image_name,
            size=size,
        ).all()

        # try to slap it into the script, it is not OK if we are not allowed to, assume we should
        # this is just a validation step, should be taken care of by proper schema validation.
        try:
            script % '0000-aaaaa'
        except TypeError:
            logger.error('attempted to add a UUID to the script but failed')
            logger.error(
                'ensure that a formatting entry for %s["script"] exists, like: %%s' % name
            )
            return  # do not add anything if we haven't been able to format

        logger.info('checking if an existing node matches required labels: %s', str(labels))
        matching_nodes = [n for n in existing_nodes if n.labels_match(labels)]
        if not matching_nodes:  # we don't have anything that matches this that has been ever created
            logger.info('job needs %s nodes to get unstuck', count)
            logger.info(
                'no matching nodes were found, will create new ones. count: %s',
                buffered_count
            )
            for i in range(buffered_count):
                # slap the UUID into the new node details
                node_kwargs = deepcopy(request.json)
                _id = str(uuid.uuid4())
                node_kwargs['name'] = "%s__%s" % (name, _id)
                node_kwargs['script'] = script % _id

                provider.create_node(**node_kwargs)
                node_kwargs.pop('name')
                Node(
                    name=name,
                    identifier=_id,
                    **node_kwargs
                )
                models.commit()
        else:
            logger.info('found existing nodes that match labels: %s', len(matching_nodes))
            now = datetime.utcnow()
            # we have something that matches, go over all of them and check:
            # if *all of them* are over 6 (by default) minutes since creation.
            # that means that they are probably busy, so create a new one
            already_created_nodes = 0
            for n in matching_nodes:
                difference = now - n.created
                if difference.seconds < 360:  # 6 minutes
                    already_created_nodes += 1
            if already_created_nodes > count:
                logger.info('job needs %s nodes to get unstuck', count)
                logger.info(
                    'but there are %s node(s) already created 6 minutes ago',
                    already_created_nodes
                )
                logger.info('will not create one')
                return
            logger.info('job needs %s nodes to get unstuck', count)
            logger.info(
                'no nodes created recently enough, will create new ones. count: %s',
                buffered_count
            )
            for i in range(buffered_count):
                # slap the UUID into the new node details
                node_kwargs = deepcopy(request.json)
                _id = str(uuid.uuid4())
                node_kwargs['name'] = "%s__%s" % (name, _id)
                node_kwargs['script'] = script % _id

                provider.create_node(**node_kwargs)
                node_kwargs.pop('name')
                Node(
                    name=name,
                    identifier=_id,
                    **node_kwargs
                )
                models.commit()
示例#15
0
    def index(self):
        provider = providers.get(request.json['provider'])
        # request.json is read-only, since we are going to add extra metadata
        # to get the classes created, make a clean copy
        _json = deepcopy(request.json)

        # Before creating a node, check if it has already been created by us:
        name = _json['name']
        keyname = _json['keyname']
        image_name = _json['image_name']
        size = _json['size']
        labels = _json['labels']
        script = _json['script']
        count = _json.get('count', 1)
        # a buffered count is 3/4 what is needed rounded up
        buffered_count = int(round(count * 0.75))
        existing_nodes = Node.filter_by(
            name=name,
            keyname=keyname,
            image_name=image_name,
            size=size,
        ).all()

        # try to slap it into the script, it is not OK if we are not allowed to, assume we should
        # this is just a validation step, should be taken care of by proper schema validation.
        try:
            script % '0000-aaaaa'
        except TypeError:
            logger.error('attempted to add a UUID to the script but failed')
            logger.error(
                'ensure that a formatting entry for %s["script"] exists, like: %%s' % name
            )
            return  # do not add anything if we haven't been able to format

        logger.info('checking if an existing node matches required labels: %s', str(labels))
        matching_nodes = [n for n in existing_nodes if n.labels_match(labels)]
        if not matching_nodes:  # we don't have anything that matches this that has been ever created
            logger.info('job needs %s nodes to get unstuck', count)
            logger.info(
                'no matching nodes were found, will create new ones. count: %s',
                buffered_count
            )
            for i in range(buffered_count):
                # slap the UUID into the new node details
                node_kwargs = deepcopy(request.json)
                _id = str(uuid.uuid4())
                node_kwargs['name'] = "%s__%s" % (name, _id)
                node_kwargs['script'] = script % _id

                provider.create_node(**node_kwargs)
                node_kwargs.pop('name')
                Node(
                    name=name,
                    identifier=_id,
                    **node_kwargs
                )
                models.commit()
        else:
            logger.info('found existing nodes that match labels: %s', len(matching_nodes))
            now = datetime.utcnow()
            # we have something that matches, go over all of them and check:
            # if *all of them* are over 6 (by default) minutes since creation.
            # that means that they are probably busy, so create a new one
            already_created_nodes = 0
            for n in matching_nodes:
                difference = now - n.created
                if difference.seconds < 360:  # 6 minutes
                    already_created_nodes += 1
            if already_created_nodes >= count:
                logger.info('job needs %s nodes to get unstuck', count)
                logger.info(
                    'but there are %s node(s) already created 6 minutes ago',
                    already_created_nodes
                )
                logger.info('will not create one')
                return
            logger.info('job needs %s nodes to get unstuck', count)
            logger.info(
                'no nodes created recently enough, will create new ones. count: %s',
                buffered_count
            )
            for i in range(buffered_count):
                # slap the UUID into the new node details
                node_kwargs = deepcopy(request.json)
                _id = str(uuid.uuid4())
                node_kwargs['name'] = "%s__%s" % (name, _id)
                node_kwargs['script'] = script % _id

                provider.create_node(**node_kwargs)
                node_kwargs.pop('name')
                Node(
                    name=name,
                    identifier=_id,
                    **node_kwargs
                )
                models.commit()
示例#16
0
    def index(self):
        provider = providers.get(request.json['provider'])
        # request.json is read-only, since we are going to add extra metadata
        # to get the classes created, make a clean copy
        _json = deepcopy(request.json)

        # Before creating a node, check if it has already been created by us:
        name = _json['name']
        keyname = _json['keyname']
        image_name = _json['image_name']
        size = _json['size']
        labels = _json['labels']
        script = _json['script']
        existing_nodes = Node.filter_by(
            name=name,
            keyname=keyname,
            image_name=image_name,
            size=size,
        ).all()

        # slap the UUID into the new node details
        _id = str(uuid.uuid4())
        _json['name'] = "%s__%s" % (name, _id)
        # try to slap it into the script, it is not OK if we are not allowed to, assume we should
        try:
            _json['script'] = script % _id
        except TypeError:
            logger.error('attempted to add a UUID to the script but failed')
            logger.error(
                'ensure that a formatting entry for %s["script"] exists, like: %%s' % name
            )
            return  # do not add anything if we haven't been able to format

        logger.info('checking if an existing node matches required labels: %s', str(labels))
        matching_nodes = [n for n in existing_nodes if n.labels_match(labels)]
        if not matching_nodes:  # we don't have anything that matches this that has been ever created
            logger.info('no matching nodes were found, will create one')
            logger.warning('creating node with details: %s' % str(_json))
            provider.create_node(**_json)
            _json.pop('name')
            Node(
                name=request.json['name'],
                identifier=_id,
                **_json
            )
        else:
            logger.info('found existing nodes that match labels: %s', len(matching_nodes))
            now = datetime.utcnow()
            # we have something that matches, go over all of them and check:
            # if *all of them* are over 8 (by default) minutes since creation.
            # that means that they are probably busy, so create a new one
            for n in matching_nodes:
                difference = now - n.created
                if difference.seconds < 480:  # 8 minutes
                    logger.info(
                        'a matching node was already created in the past 8 minutes: %s', n.name
                    )
                    logger.info('will not create one')
                    return
                    # FIXME: need to check with cloud provider and see if this
                    # node is running, otherwise it means this node is dead and
                    # should be removed from the DB
            logger.info('no nodes created recently, will create a new one')
            provider.create_node(**_json)
            _json.pop('name')
            Node(
                name=request.json['name'],
                identifier=_id,
                **_json
            )