def get_nodes(self):
        nodes = self._mongo.db['nodes'].find({}, {
            'cluster_node': 1,
            'is_online': 1,
            'debug_info': 1,
            'total_ram': 1,
            'total_cpus': 1
        })
        result = []
        for node in nodes:
            del node['_id']
            node_name = node['cluster_node']
            application_containers = list(self._mongo.db['application_containers'].find({
                'state': {'$nin': end_states()},
                'cluster_node': node_name
            }, {
                'container_ram': 1
            }))
            data_containers = list(self._mongo.db['data_containers'].find({
                'state': {'$nin': end_states()},
                'cluster_node': node_name
            }, {
                'container_ram': 1
            }))

            reserved_dc_ram = [c['container_ram'] for c in data_containers]
            reserved_ac_ram = [c['container_ram'] for c in application_containers]

            node['reserved_ram'] = sum(reserved_dc_ram + reserved_ac_ram)
            node['active_data_containers'] = reserved_dc_ram
            node['active_application_containers'] = reserved_ac_ram

            result.append(node)

        return jsonify({'nodes': result})
示例#2
0
    def _cron(self):
        while True:
            work_to_do = False
            task = self._mongo.db['tasks'].find_one(
                {'state': {'$nin': end_states()}},
                {'_id': 1}
            )
            if task:
                work_to_do = True
            else:
                application_container = self._mongo.db['application_containers'].find_one(
                    {'state': {'$nin': end_states()}},
                    {'_id': 1}
                )
                if application_container:
                    work_to_do = True
                else:
                    data_container = self._mongo.db['data_containers'].find_one(
                        {'state': {'$nin': end_states()}},
                        {'_id': 1}
                    )
                    if data_container:
                        work_to_do = True

            if work_to_do:
                _put(self._scheduling_q)
                _put(self._data_container_callback_q)

            sleep(self._config.server_master['scheduling_interval_seconds'])
示例#3
0
    def clean_up_unused_data_containers(self):
        with self._data_container_lock:
            cursor = self._mongo.db['data_containers'].find(
                {'state': state_to_index('processing')}, {'_id': 1})
            for data_container in cursor:
                data_container_id = data_container['_id']
                application_container = self._mongo.db[
                    'application_containers'].find_one(
                        {
                            'state': {
                                '$nin': end_states()
                            },
                            'data_container_ids': data_container_id
                        }, {'_id': 1})
                if application_container:
                    continue

                description = 'Container removed. Not in use by any application container.'
                self._state_handler.transition('data_containers',
                                               data_container_id, 'success',
                                               description)
                node_name = self._lookup_node_name(data_container_id,
                                                   'data_containers')
                self._cluster_provider.remove_container(
                    node_name, data_container_id)
示例#4
0
    def clean_up_containers(self):
        containers = self._cluster_provider.containers()
        for key in list(containers):
            try:
                ObjectId(key)
            except:
                del containers[key]

        for collection in ['application_containers', 'data_containers']:
            cursor = self._mongo.db[collection].find(
                {'_id': {
                    '$in': [ObjectId(key) for key in containers]
                }}, {'state': 1})
            for c in cursor:
                name = str(c['_id'])
                container = containers[name]
                node_name = self._lookup_node_name(c['_id'], collection)
                if c['state'] in end_states():
                    self._cluster_provider.remove_container(
                        node_name, c['_id'])
                elif container.get(
                        'exit_status') and container['exit_status'] != 0:
                    logs = 'container logs not available'
                    try:
                        logs = self._cluster_provider.logs_from_container(
                            node_name, c['_id'])
                    except:
                        pass
                    description = 'Container exited unexpectedly ({}): {}'.format(
                        container['description'], logs)
                    self._state_handler.transition(collection, c['_id'],
                                                   'failed', description)
                    self._cluster_provider.remove_container(
                        node_name, c['_id'])

        for collection in ['application_containers', 'data_containers']:
            cursor = self._mongo.db[collection].find(
                {'state': {
                    '$in': [1, 2]
                }}, {'_id': 1})
            for c in cursor:
                name = str(c['_id'])
                if name not in containers:
                    description = 'Container vanished.'
                    self._state_handler.transition(collection, c['_id'],
                                                   'failed', description)
示例#5
0
    def schedule(self):
        dc_ram = self._config.defaults['data_container_description'][
            'container_ram']

        nodes_list = self._mongo.db['nodes'].find({'is_online': True}, {
            'cluster_node': 1,
            'total_ram': 1
        })

        nodes = {}

        for node in nodes_list:
            node_name = node['cluster_node']
            application_containers = list(
                self._mongo.db['application_containers'].find(
                    {
                        'state': {
                            '$nin': end_states()
                        },
                        'cluster_node': node_name
                    }, {'container_ram': 1}))
            data_containers = list(self._mongo.db['data_containers'].find(
                {
                    'state': {
                        '$nin': end_states()
                    },
                    'cluster_node': node_name
                }, {'container_ram': 1}))

            reserved_dc_ram = [c['container_ram'] for c in data_containers]
            reserved_ac_ram = [
                c['container_ram'] for c in application_containers
            ]

            node['reserved_ram'] = sum(reserved_dc_ram + reserved_ac_ram)
            node['free_ram'] = node['total_ram'] - node['reserved_ram']

            nodes[node_name] = node

        for task in self._task_selection:
            ac_ram = task['application_container_description']['container_ram']
            required_dc_ram = dc_ram
            if task.get('no_cache'):
                required_dc_ram = 0

            if not _is_task_fitting(nodes, ac_ram, required_dc_ram):
                description = 'Task is too large for cluster.'
                self._state_handler.transition('tasks', task['_id'], 'failed',
                                               description)
                continue

            application_container = application_container_prototype(ac_ram)
            application_container['task_id'] = [task['_id']]
            application_container['username'] = task['username']
            application_container_id = self._mongo.db[
                'application_containers'].insert_one(
                    application_container).inserted_id

            if not task.get('no_cache'):
                self._caching.apply(application_container_id)

            data_containers = self._mongo.db['data_containers'].find(
                {'state': -1}, {
                    '_id': 1,
                    'cluster_node': 1
                })

            assign_to_node = []
            for data_container in data_containers:
                if not data_container['cluster_node']:
                    assign_to_node.append(
                        (dc_ram, data_container['_id'], 'data_containers'))
            assign_to_node.append(
                (ac_ram, application_container_id, 'application_containers'))
            assign_to_node.sort(reverse=True)

            failed = False

            for ram, _id, collection in assign_to_node:
                node_name = self._container_allocation(nodes, ram)
                if not node_name:
                    failed = True
                    break
                self._mongo.db[collection].update_one(
                    {'_id': _id}, {'$set': {
                        'cluster_node': node_name
                    }})
                nodes[node_name]['free_ram'] -= ram

            if failed:
                for ram, _id, collection in assign_to_node:
                    self._mongo.db[collection].delete_one({'_id': _id})
                break

            for ram, _id, collection in assign_to_node:
                description = 'Container created.'
                self._state_handler.transition(collection, _id, 'created',
                                               description)