def get_nodes(self): nodes = self._mongo.db['nodes'].find({}, { 'cluster_node': 1, 'is_online': 1, 'debug_info': 1, 'total_ram': 1, 'total_cpus': 1 }) result = [] for node in nodes: del node['_id'] node_name = node['cluster_node'] application_containers = list(self._mongo.db['application_containers'].find({ 'state': {'$nin': end_states()}, 'cluster_node': node_name }, { 'container_ram': 1 })) data_containers = list(self._mongo.db['data_containers'].find({ 'state': {'$nin': end_states()}, 'cluster_node': node_name }, { 'container_ram': 1 })) reserved_dc_ram = [c['container_ram'] for c in data_containers] reserved_ac_ram = [c['container_ram'] for c in application_containers] node['reserved_ram'] = sum(reserved_dc_ram + reserved_ac_ram) node['active_data_containers'] = reserved_dc_ram node['active_application_containers'] = reserved_ac_ram result.append(node) return jsonify({'nodes': result})
def _cron(self): while True: work_to_do = False task = self._mongo.db['tasks'].find_one( {'state': {'$nin': end_states()}}, {'_id': 1} ) if task: work_to_do = True else: application_container = self._mongo.db['application_containers'].find_one( {'state': {'$nin': end_states()}}, {'_id': 1} ) if application_container: work_to_do = True else: data_container = self._mongo.db['data_containers'].find_one( {'state': {'$nin': end_states()}}, {'_id': 1} ) if data_container: work_to_do = True if work_to_do: _put(self._scheduling_q) _put(self._data_container_callback_q) sleep(self._config.server_master['scheduling_interval_seconds'])
def clean_up_unused_data_containers(self): with self._data_container_lock: cursor = self._mongo.db['data_containers'].find( {'state': state_to_index('processing')}, {'_id': 1}) for data_container in cursor: data_container_id = data_container['_id'] application_container = self._mongo.db[ 'application_containers'].find_one( { 'state': { '$nin': end_states() }, 'data_container_ids': data_container_id }, {'_id': 1}) if application_container: continue description = 'Container removed. Not in use by any application container.' self._state_handler.transition('data_containers', data_container_id, 'success', description) node_name = self._lookup_node_name(data_container_id, 'data_containers') self._cluster_provider.remove_container( node_name, data_container_id)
def clean_up_containers(self): containers = self._cluster_provider.containers() for key in list(containers): try: ObjectId(key) except: del containers[key] for collection in ['application_containers', 'data_containers']: cursor = self._mongo.db[collection].find( {'_id': { '$in': [ObjectId(key) for key in containers] }}, {'state': 1}) for c in cursor: name = str(c['_id']) container = containers[name] node_name = self._lookup_node_name(c['_id'], collection) if c['state'] in end_states(): self._cluster_provider.remove_container( node_name, c['_id']) elif container.get( 'exit_status') and container['exit_status'] != 0: logs = 'container logs not available' try: logs = self._cluster_provider.logs_from_container( node_name, c['_id']) except: pass description = 'Container exited unexpectedly ({}): {}'.format( container['description'], logs) self._state_handler.transition(collection, c['_id'], 'failed', description) self._cluster_provider.remove_container( node_name, c['_id']) for collection in ['application_containers', 'data_containers']: cursor = self._mongo.db[collection].find( {'state': { '$in': [1, 2] }}, {'_id': 1}) for c in cursor: name = str(c['_id']) if name not in containers: description = 'Container vanished.' self._state_handler.transition(collection, c['_id'], 'failed', description)
def schedule(self): dc_ram = self._config.defaults['data_container_description'][ 'container_ram'] nodes_list = self._mongo.db['nodes'].find({'is_online': True}, { 'cluster_node': 1, 'total_ram': 1 }) nodes = {} for node in nodes_list: node_name = node['cluster_node'] application_containers = list( self._mongo.db['application_containers'].find( { 'state': { '$nin': end_states() }, 'cluster_node': node_name }, {'container_ram': 1})) data_containers = list(self._mongo.db['data_containers'].find( { 'state': { '$nin': end_states() }, 'cluster_node': node_name }, {'container_ram': 1})) reserved_dc_ram = [c['container_ram'] for c in data_containers] reserved_ac_ram = [ c['container_ram'] for c in application_containers ] node['reserved_ram'] = sum(reserved_dc_ram + reserved_ac_ram) node['free_ram'] = node['total_ram'] - node['reserved_ram'] nodes[node_name] = node for task in self._task_selection: ac_ram = task['application_container_description']['container_ram'] required_dc_ram = dc_ram if task.get('no_cache'): required_dc_ram = 0 if not _is_task_fitting(nodes, ac_ram, required_dc_ram): description = 'Task is too large for cluster.' self._state_handler.transition('tasks', task['_id'], 'failed', description) continue application_container = application_container_prototype(ac_ram) application_container['task_id'] = [task['_id']] application_container['username'] = task['username'] application_container_id = self._mongo.db[ 'application_containers'].insert_one( application_container).inserted_id if not task.get('no_cache'): self._caching.apply(application_container_id) data_containers = self._mongo.db['data_containers'].find( {'state': -1}, { '_id': 1, 'cluster_node': 1 }) assign_to_node = [] for data_container in data_containers: if not data_container['cluster_node']: assign_to_node.append( (dc_ram, data_container['_id'], 'data_containers')) assign_to_node.append( (ac_ram, application_container_id, 'application_containers')) assign_to_node.sort(reverse=True) failed = False for ram, _id, collection in assign_to_node: node_name = self._container_allocation(nodes, ram) if not node_name: failed = True break self._mongo.db[collection].update_one( {'_id': _id}, {'$set': { 'cluster_node': node_name }}) nodes[node_name]['free_ram'] -= ram if failed: for ram, _id, collection in assign_to_node: self._mongo.db[collection].delete_one({'_id': _id}) break for ram, _id, collection in assign_to_node: description = 'Container created.' self._state_handler.transition(collection, _id, 'created', description)