def check_orphaned(): """ Machines created in providers might be in an error state or some configuration in between may have prevented them to join Jenkins (or manually removed). This task will go through the nodes it knows about, make sure they exist in the provider and if so, remove them from the mita database and the provider. """ conn = connections.jenkins_connection() nodes = models.Node.query.all() for node in nodes: # it is all good if this node exists in Jenkins. That is the whole # reason for its miserable existence, to work for Mr. Jenkins. Let it # be. if node.jenkins_name: if conn.node_exists(node.jenkins_name): continue # So this node is not in Jenkins. If it is less than 15 minutes then # don't do anything because it might be just taking a while to join. # ALERT MR ROBINSON: 15 minutes is a magical number. now = datetime.utcnow() difference = now - node.created if difference.seconds > 900: # magical number alert logger.info("found created node that didn't join Jenkins: %s", node) provider = providers.get(node.provider) # "We often miss opportunity because it's dressed in overalls and # looks like work". Node missed his opportunity here. try: provider.destroy_node(name=node.cloud_name) except Exception: logger.exception("unable to destroy node: %s", node.cloud_name) logger.info("removed useless node from provider and database: %s", node) node.delete() models.commit()
def connection(app, request): """Session-wide test database.""" # Connect and create the temporary database print "=" * 80 print "CREATING TEMPORARY DATABASE FOR TESTS" print "=" * 80 subprocess.call(['dropdb', DBNAME]) subprocess.call(['createdb', DBNAME]) # Bind and create the database tables _db.clear() engine_url = '%s/%s' % (BIND, DBNAME) db_engine = create_engine(engine_url, encoding='utf-8', poolclass=NullPool) # AKA models.start() _db.Session.bind = db_engine _db.metadata.bind = _db.Session.bind _db.Base.metadata.create_all(db_engine) _db.commit() _db.clear() #connection = db_engine.connect() def teardown(): _db.Base.metadata.drop_all(db_engine) request.addfinalizer(teardown) # Slap our test app on it _db.app = app return _db
def check_orphaned(): """ Machines created in providers might be in an error state or some configuration in between may have prevented them to join Jenkins (or manually removed). This task will go through the nodes it knows about, make sure they exist in the provider and if so, remove them from the mita database and the provider. """ conn = connections.jenkins_connection() try: nodes = models.Node.query.all() except InvalidRequestError: logger.exception('could not list nodes') models.rollback() # we can try again at the next scheduled task run return for node in nodes: # it is all good if this node exists in Jenkins. That is the whole # reason for its miserable existence, to work for Mr. Jenkins. Let it # be. if node.jenkins_name: if conn.node_exists(node.jenkins_name): continue # So this node is not in Jenkins. If it is less than 15 minutes then # don't do anything because it might be just taking a while to join. # ALERT MR ROBINSON: 15 minutes is a magical number. now = datetime.utcnow() difference = now - node.created if difference.seconds > 900: # magical number alert logger.info("found created node that didn't join Jenkins: %s", node) provider = providers.get(node.provider) # "We often miss opportunity because it's dressed in overalls and # looks like work". Node missed his opportunity here. try: provider.destroy_node(name=node.cloud_name) except CloudNodeNotFound: logger.info("cloud was not found on provider: %s", node.cloud_name) logger.info( "will remove node from database, API confirms it no longer exists" ) node.delete() models.commit() except Exception: logger.exception("unable to destroy node: %s", node.cloud_name) logger.error("will skip database removal") continue # providers can purge nodes in error state too, try to prune those as well providers_conf = pecan.conf.provider.to_dict() for provider_name in providers_conf.keys(): provider = providers.get(provider_name) provider.purge()
def delete_node(node_id): node = models.Node.get(node_id) if not node: logger.warning('async node deletion could not be completed') logger.warning('%s node id no longer exists', node_id) return util.delete_provider_node(providers.get(node.provider), node.cloud_name) util.delete_jenkins_node(node.jenkins_name) node.delete() models.commit()
def delete_node(node_id): node = models.Node.get(node_id) if not node: logger.warning('async node deletion could not be completed') logger.warning('%s node id no longer exists', node_id) return util.delete_provider_node( providers.get(node.provider), node.cloud_name ) util.delete_jenkins_node(node.jenkins_name) node.delete() models.commit()
def run(self, args): super(PopulateCommand, self).run(args) out("LOADING ENVIRONMENT") self.load_app() out("BUILDING SCHEMA") try: out("STARTING A TRANSACTION...") models.start() models.Base.metadata.create_all(conf.sqlalchemy.engine) except: models.rollback() out("ROLLING BACK... ") raise else: out("COMMITING... ") models.commit()
def check_orphaned(): """ Machines created in providers might be in an error state or some configuration in between may have prevented them to join Jenkins (or manually removed). This task will go through the nodes it knows about, make sure they exist in the provider and if so, remove them from the mita database and the provider. """ conn = connections.jenkins_connection() nodes = models.Node.query.all() for node in nodes: # it is all good if this node exists in Jenkins. That is the whole # reason for its miserable existence, to work for Mr. Jenkins. Let it # be. if node.jenkins_name: if conn.node_exists(node.jenkins_name): continue # So this node is not in Jenkins. If it is less than 15 minutes then # don't do anything because it might be just taking a while to join. # ALERT MR ROBINSON: 15 minutes is a magical number. now = datetime.utcnow() difference = now - node.created if difference.seconds > 900: # magical number alert logger.info("found created node that didn't join Jenkins: %s", node) provider = providers.get(node.provider) # "We often miss opportunity because it's dressed in overalls and # looks like work". Node missed his opportunity here. try: provider.destroy_node(name=node.cloud_name) except Exception: logger.exception("unable to destroy node: %s", node.cloud_name) logger.error("will skip database removal") return logger.info("removed useless node from provider and database: %s", node) node.delete() models.commit()
def connection(app, request): """Session-wide test database.""" # Connect and create the temporary database print "=" * 80 print "CREATING TEMPORARY DATABASE FOR TESTS" print "=" * 80 subprocess.call(['dropdb', DBNAME]) subprocess.call(['createdb', DBNAME]) # Bind and create the database tables _db.clear() engine_url = '%s/%s' % (BIND, DBNAME) db_engine = create_engine( engine_url, encoding='utf-8', poolclass=NullPool) # AKA models.start() _db.Session.bind = db_engine _db.metadata.bind = _db.Session.bind _db.Base.metadata.create_all(db_engine) _db.commit() _db.clear() #connection = db_engine.connect() def teardown(): _db.Base.metadata.drop_all(db_engine) request.addfinalizer(teardown) # Slap our test app on it _db.app = app return _db
def index(self): provider = providers.get(request.json['provider']) # request.json is read-only, since we are going to add extra metadata # to get the classes created, make a clean copy _json = deepcopy(request.json) # Before creating a node, check if it has already been created by us: name = _json['name'] keyname = _json['keyname'] image_name = _json['image_name'] size = _json['size'] labels = _json['labels'] script = _json['script'] count = _json.get('count', 1) # a buffered count is 3/4 what is needed rounded up buffered_count = int(round(count * 0.75)) existing_nodes = Node.filter_by( name=name, keyname=keyname, image_name=image_name, size=size, ).all() # try to slap it into the script, it is not OK if we are not allowed to, assume we should # this is just a validation step, should be taken care of by proper schema validation. try: script % '0000-aaaaa' except TypeError: logger.error('attempted to add a UUID to the script but failed') logger.error( 'ensure that a formatting entry for %s["script"] exists, like: %%s' % name ) return # do not add anything if we haven't been able to format logger.info('checking if an existing node matches required labels: %s', str(labels)) matching_nodes = [n for n in existing_nodes if n.labels_match(labels)] if not matching_nodes: # we don't have anything that matches this that has been ever created logger.info('job needs %s nodes to get unstuck', count) logger.info( 'no matching nodes were found, will create new ones. count: %s', buffered_count ) for i in range(buffered_count): # slap the UUID into the new node details node_kwargs = deepcopy(request.json) _id = str(uuid.uuid4()) node_kwargs['name'] = "%s__%s" % (name, _id) node_kwargs['script'] = script % _id provider.create_node(**node_kwargs) node_kwargs.pop('name') Node( name=name, identifier=_id, **node_kwargs ) models.commit() else: logger.info('found existing nodes that match labels: %s', len(matching_nodes)) now = datetime.utcnow() # we have something that matches, go over all of them and check: # if *all of them* are over 6 (by default) minutes since creation. # that means that they are probably busy, so create a new one already_created_nodes = 0 for n in matching_nodes: difference = now - n.created if difference.seconds < 360: # 6 minutes already_created_nodes += 1 if already_created_nodes > count: logger.info('job needs %s nodes to get unstuck', count) logger.info( 'but there are %s node(s) already created 6 minutes ago', already_created_nodes ) logger.info('will not create one') return logger.info('job needs %s nodes to get unstuck', count) logger.info( 'no nodes created recently enough, will create new ones. count: %s', buffered_count ) for i in range(buffered_count): # slap the UUID into the new node details node_kwargs = deepcopy(request.json) _id = str(uuid.uuid4()) node_kwargs['name'] = "%s__%s" % (name, _id) node_kwargs['script'] = script % _id provider.create_node(**node_kwargs) node_kwargs.pop('name') Node( name=name, identifier=_id, **node_kwargs ) models.commit()
def index(self): provider = providers.get(request.json['provider']) # request.json is read-only, since we are going to add extra metadata # to get the classes created, make a clean copy _json = deepcopy(request.json) # Before creating a node, check if it has already been created by us: name = _json['name'] keyname = _json['keyname'] image_name = _json['image_name'] size = _json['size'] labels = _json['labels'] script = _json['script'] count = _json.get('count', 1) # a buffered count is 3/4 what is needed rounded up buffered_count = int(round(count * 0.75)) existing_nodes = Node.filter_by( name=name, keyname=keyname, image_name=image_name, size=size, ).all() # try to slap it into the script, it is not OK if we are not allowed to, assume we should # this is just a validation step, should be taken care of by proper schema validation. try: script % '0000-aaaaa' except TypeError: logger.error('attempted to add a UUID to the script but failed') logger.error( 'ensure that a formatting entry for %s["script"] exists, like: %%s' % name ) return # do not add anything if we haven't been able to format logger.info('checking if an existing node matches required labels: %s', str(labels)) matching_nodes = [n for n in existing_nodes if n.labels_match(labels)] if not matching_nodes: # we don't have anything that matches this that has been ever created logger.info('job needs %s nodes to get unstuck', count) logger.info( 'no matching nodes were found, will create new ones. count: %s', buffered_count ) for i in range(buffered_count): # slap the UUID into the new node details node_kwargs = deepcopy(request.json) _id = str(uuid.uuid4()) node_kwargs['name'] = "%s__%s" % (name, _id) node_kwargs['script'] = script % _id provider.create_node(**node_kwargs) node_kwargs.pop('name') Node( name=name, identifier=_id, **node_kwargs ) models.commit() else: logger.info('found existing nodes that match labels: %s', len(matching_nodes)) now = datetime.utcnow() # we have something that matches, go over all of them and check: # if *all of them* are over 6 (by default) minutes since creation. # that means that they are probably busy, so create a new one already_created_nodes = 0 for n in matching_nodes: difference = now - n.created if difference.seconds < 360: # 6 minutes already_created_nodes += 1 if already_created_nodes >= count: logger.info('job needs %s nodes to get unstuck', count) logger.info( 'but there are %s node(s) already created 6 minutes ago', already_created_nodes ) logger.info('will not create one') return logger.info('job needs %s nodes to get unstuck', count) logger.info( 'no nodes created recently enough, will create new ones. count: %s', buffered_count ) for i in range(buffered_count): # slap the UUID into the new node details node_kwargs = deepcopy(request.json) _id = str(uuid.uuid4()) node_kwargs['name'] = "%s__%s" % (name, _id) node_kwargs['script'] = script % _id provider.create_node(**node_kwargs) node_kwargs.pop('name') Node( name=name, identifier=_id, **node_kwargs ) models.commit()