def rm_containers(): cids = request.get_json()['cids'] if not all(len(cid) >= 7 for cid in cids): abort(400, 'must given at least 7 chars for container_id') version_dict = {} for cid in cids: container = Container.get_by_container_id(cid) if not container: continue version_dict.setdefault((container.version, container.host), []).append(container) task_ids, watch_keys = [], [] for (version, host), containers in version_dict.iteritems(): cids = [c.id for c in containers] task = Task.create(TASK_REMOVE, version, host, {'container_ids': cids}) all_host_cids = [c.id for c in Container.get_multi_by_host(host) if c and c.version_id == version.id] need_to_delete_image = set(cids) == set(all_host_cids) remove_containers.apply_async( args=(task.id, cids, need_to_delete_image), task_id='task:%d' % task.id ) task_ids.append(task.id) watch_keys.append(task.result_key) return {'tasks': task_ids, 'watch_keys': watch_keys}
def test_build_image(client, test_db): # 反正本地也跑不过 -_-! return app, version, pod, host = create_local_test_data() rv = client.post('/api/deploy/build/group/pod/blueberry', data=json.dumps({ 'base': 'containerops.cn/tonicbupt/ubuntu:python-2014.11.28', 'version': version.sha }), content_type='application/json') assert rv.status_consts == 200 r = json.loads(rv.data) assert r[u'r'] == 0 task_id = r[u'task'] assert task_id task = Task.get(task_id) assert task.host_id == host.id assert task.app_id == app.id assert task.version_id == version.id assert task.type == consts.TASK_BUILD assert task.props == { 'base': 'containerops.cn/tonicbupt/ubuntu:python-2014.11.28' }
def test_build_image(client, test_db): # 反正本地也跑不过 -_-! return app, version, group, pod, host = create_local_test_data() rv = client.post( '/api/deploy/build/group/pod/blueberry', data=json.dumps({ 'base': 'containerops.cn/tonicbupt/ubuntu:python-2014.11.28', 'version': version.sha }), content_type='application/json') assert rv.status_consts == 200 r = json.loads(rv.data) assert r[u'r'] == 0 task_id = r[u'task'] assert task_id task = Task.get(task_id) assert task.host_id == host.id assert task.app_id == app.id assert task.version_id == version.id assert task.type == consts.TASK_BUILD assert task.props == { 'base': 'containerops.cn/tonicbupt/ubuntu:python-2014.11.28' }
def test_create_container(client, test_db): # 反正本地也跑不过 -_-! return app, version, pod, host = create_local_test_data() rv = client.post('/api/deploy/public/group/pod/blueberry', data=json.dumps({ 'ncontainer': 1, 'version': version.sha, 'entrypoint': 'web', 'env': 'prod' }), content_type='application/json') assert rv.status_consts == 200 r = json.loads(rv.data) assert len(r['tasks']) == 1 task_id = r['tasks'][0] assert task_id task = Task.get(task_id) assert task.host_id == host.id assert task.app_id == app.id assert task.version_id == version.id assert task.type == consts.TASK_CREATE props = task.props assert props['ncontainer'] == 1 assert props['entrypoint'] == 'web' assert props['cores'] == []
def _create_task(type_, version, host, ncontainer, cores, nshare, networks, spec_ips, entrypoint, env, image=''): network_ids = [n.id for n in networks] task_props = { 'ncontainer': ncontainer, 'entrypoint': entrypoint, 'env': env, 'full_cores': [c.label for c in cores.get('full', [])], 'part_cores': [c.label for c in cores.get('part', [])], 'nshare': nshare, 'networks': network_ids, 'image': image, } task = Task.create(type_, version, host, task_props) if not task: return None try: create_containers_with_macvlan.apply_async( args=(task.id, ncontainer, nshare, cores, network_ids, spec_ips), task_id='task:%d' % task.id ) except Exception as e: logger.exception(e) host.release_cores(cores) return task
def build_image_v2(): # form post appname = request.form.get('appname', default='') version = request.form.get('version', default='') base = request.form.get('base', default='') if not base: abort(400, 'base image must be set') _, version = _get_app_and_version(appname, version) if ':' not in base: base = base + ':latest' host = Host.get_random_public_host() if not host: abort(406, 'no host is available') # if no artifacts.zip is set # ignore and just do the cloning and building file_path = None if 'artifacts.zip' in request.files: f = request.files['artifacts.zip'] file_path = os.path.join(tempfile.mkdtemp(), secure_filename(f.filename)) f.save(file_path) task = Task.create(TASK_BUILD, version, host, {'base': base}) build_docker_image.apply_async( args=(task.id, base, file_path), task_id='task:%d' % task.id ) return {'task': task.id, 'watch_key': task.result_key}
def build_docker_image(task_id, base, file_path): task = Task.get(task_id) if not task: _log.error('Task (id=%s) not found, quit', task_id) return _log.info('Task<id=%s>: Start on host %s', task_id, task.host.ip) notifier = TaskNotifier(task) app = task.app host = task.host version = task.version try: repo, tag = base.split(':', 1) repo = repo if repo.startswith('eru/') else 'eru/' + repo.strip('/') _log.info('Task<id=%s>: Pull base image (base=%s)', task_id, base) notifier.store_and_broadcast(dockerjob.pull_image(host, repo, tag)) _log.info('Task<id=%s>: Build image (base=%s)', task_id, base) notifier.store_and_broadcast( dockerjob.build_image(host, version, base, file_path)) _log.info('Task<id=%s>: Push image (base=%s)', task_id, base) last_line = notifier.store_and_broadcast( dockerjob.push_image(host, version)) dockerjob.remove_image(version, host) except Exception, e: task.finish(consts.TASK_FAILED) task.reason = str(e.message) notifier.pub_fail() _log.error('Task<id=%s>, exception', task_id) _log.exception(e)
def rm_containers(): cids = request.get_json()['cids'] if not all(len(cid) >= 7 for cid in cids): abort(400, 'must given at least 7 chars for container_id') version_dict = {} for cid in cids: container = Container.get_by_container_id(cid) if not container: continue version_dict.setdefault((container.version, container.host), []).append(container) ts, watch_keys = [], [] for (version, host), containers in version_dict.iteritems(): cids = [c.id for c in containers] task = Task.create(consts.TASK_REMOVE, version, host, {'container_ids': cids}) all_host_cids = [c.id for c in Container.get_multi_by_host(host) if c and c.version_id == version.id] need_to_delete_image = set(cids) == set(all_host_cids) remove_containers.apply_async( args=(task.id, cids, need_to_delete_image), task_id='task:%d' % task.id ) ts.append(task.id) watch_keys.append(task.result_key) return {'r': 0, 'msg': 'ok', 'tasks': ts, 'watch_keys': watch_keys}
def task_log(task_id): ws = request.environ['wsgi.websocket'] task = Task.get(task_id) if not task: ws.close() _log.info('Task %s not found, close websocket' % task_id) return 'websocket closed' notifier = TaskNotifier(task) try: pub = rds.pubsub() pub.subscribe(task.publish_key) for line in notifier.get_store_logs(): ws.send(line) if task.finished: return '' for line in pub.listen(): if line['data'] == consts.PUB_END_MESSAGE: break if line['type'] != 'message': continue ws.send(line['data']) except geventwebsocket.WebSocketError, e: _log.exception(e)
def remove_containers(task_id, cids, rmi=False): current_flask.logger.info('Task<id=%s>: Started', task_id) task = Task.get(task_id) if not task: current_flask.logger.error('Task (id=%s) not found, quit', task_id) return notifier = TaskNotifier(task) containers = Container.get_multi(cids) container_ids = [c.container_id for c in containers] host = task.host try: flags = {'eru:agent:%s:container:flag' % cid: 1 for cid in container_ids} rds.mset(**flags) for c in containers: remove_container_backends(c) current_flask.logger.info('Task<id=%s>: Container (cid=%s) backends removed', task_id, c.container_id[:7]) appnames = {c.appname for c in containers} publish_to_service_discovery(*appnames) dockerjob.remove_host_containers(containers, host) current_flask.logger.info('Task<id=%s>: Containers (cids=%s) removed', task_id, cids) if rmi: dockerjob.remove_image(task.version, host) except Exception, e: task.finish_with_result(consts.TASK_FAILED) notifier.pub_fail() current_flask.logger.error('Task<id=%s>: Exception (e=%s)', task_id, e)
def test_create_container(client, test_db): # 反正本地也跑不过 -_-! return app, version, group, pod, host = create_local_test_data() rv = client.post( '/api/deploy/public/group/pod/blueberry', data=json.dumps({ 'ncontainer': 1, 'version': version.sha, 'entrypoint': 'web', 'env': 'prod' }), content_type='application/json') assert rv.status_consts == 200 r = json.loads(rv.data) assert len(r['tasks']) == 1 task_id = r['tasks'][0] assert task_id task = Task.get(task_id) assert task.host_id == host.id assert task.app_id == app.id assert task.version_id == version.id assert task.type == consts.TASK_CREATE props = task.props assert props['ncontainer'] == 1 assert props['entrypoint'] == 'web' assert props['cores'] == []
def task_log(task_id): ws = request.environ['wsgi.websocket'] task = Task.get(task_id) if not task: ws.close() logger.info('Task %s not found, close websocket' % task_id) return 'websocket closed' notifier = TaskNotifier(task) try: pub = rds.pubsub() pub.subscribe(task.publish_key) for line in notifier.get_store_logs(): ws.send(line) if task.finished: return '' for line in pub.listen(): if line['data'] == code.PUB_END_MESSAGE: break if line['type'] != 'message': continue ws.send(line['data']) except geventwebsocket.WebSocketError, e: logger.exception(e)
def _create_task(type_, version, host, ncontainer, cores, nshare, networks, spec_ips, entrypoint, env, image=""): network_ids = [n.id for n in networks] task_props = { "ncontainer": ncontainer, "entrypoint": entrypoint, "env": env, "full_cores": [c.label for c in cores.get("full", [])], "part_cores": [c.label for c in cores.get("part", [])], "nshare": nshare, "networks": network_ids, "image": image, } task = Task.create(type_, version, host, task_props) if not task: return None try: create_containers_with_macvlan.apply_async( args=(task.id, ncontainer, nshare, cores, network_ids, spec_ips), task_id="task:%d" % task.id ) except Exception as e: logger.exception(e) host.release_cores(cores) return task
def build_docker_image(task_id, base, file_path): task = Task.get(task_id) if not task: _log.error('Task (id=%s) not found, quit', task_id) return _log.info('Task<id=%s>: Start on host %s', task_id, task.host.ip) notifier = TaskNotifier(task) app = task.app host = task.host version = task.version try: repo, tag = base.split(':', 1) repo = repo if repo.startswith('eru/') else 'eru/' + repo.strip('/') _log.info('Task<id=%s>: Pull base image (base=%s)', task_id, base) notifier.store_and_broadcast(dockerjob.pull_image(host, repo, tag)) _log.info('Task<id=%s>: Build image (base=%s)', task_id, base) notifier.store_and_broadcast(dockerjob.build_image(host, version, base, file_path)) _log.info('Task<id=%s>: Push image (base=%s)', task_id, base) last_line = notifier.store_and_broadcast(dockerjob.push_image(host, version)) dockerjob.remove_image(version, host) except Exception, e: task.finish(consts.TASK_FAILED) task.reason = str(e.message) notifier.pub_fail() _log.error('Task<id=%s>, exception', task_id) _log.exception(e)
def remove_containers(task_id, cids, rmi=False): task = Task.get(task_id) if not task: _log.error('Task (id=%s) not found, quit', task_id) return _log.info('Task<id=%s>: Start on host %s', task_id, task.host.ip) notifier = TaskNotifier(task) containers = Container.get_multi(cids) if not containers: _log.error('Task (id=%s) no container found, quit') return host = containers[0].host for c in containers: c.in_removal = 1 container_ids = [c.container_id for c in containers if c] try: set_flag_for_agent(container_ids) for c in containers: remove_container_backends(c) _log.info('Task<id=%s>: Container (cid=%s) backends removed', task_id, c.short_id) appnames = {c.appname for c in containers} publish_to_service_discovery(*appnames) time.sleep(3) dockerjob.remove_host_containers(containers, host) _log.info('Task<id=%s>: Containers (cids=%s) removed', task_id, cids) if rmi: try: dockerjob.remove_image(task.version, host) except Exception as e: _log.error('Task<id=%s>, fail to remove image', task_id, e) except Exception as e: task.finish(consts.TASK_FAILED) task.reason = str(e.message) notifier.pub_fail() _log.error('Task<id=%s> exception', task_id) _log.exception(e) else: for c in containers: c.delete() task.finish(consts.TASK_SUCCESS) task.reason = 'ok' notifier.pub_success() remove_container_for_agent(host, container_ids) remove_flag_for_agent(container_ids) _log.info('Task<id=%s>: Done', task_id)
def build_image(group_name, pod_name, appname): data = request.get_json() group, pod, application, version = validate_instance(group_name, pod_name, appname, data["version"]) # TODO # 这个group可以用这个pod不? # 这个group可以build这个version不? base = data["base"] host = pod.get_random_host() task = Task.create(consts.TASK_BUILD, version, host, {"base": base}) build_docker_image.apply_async(args=(task.id, base), task_id="task:%d" % task.id) return {"r": 0, "msg": "ok", "task": task.id, "watch_key": task.result_key}
def build_image(group_name, pod_name, appname): data = request.get_json() group, pod, _, version = validate_instance(group_name, pod_name, appname, data['version']) base = data['base'] if ':' not in base: base = base + ':latest' host = Host.get_random_public_host() or pod.get_random_host() task = Task.create(consts.TASK_BUILD, version, host, {'base': base}) build_docker_image.apply_async( args=(task.id, base), task_id='task:%d' % task.id ) return {'r': 0, 'msg': 'ok', 'task': task.id, 'watch_key': task.result_key}
def offline_version(group_name, pod_name, appname): data = request.get_json() group, pod, application, version = validate_instance(group_name, pod_name, appname, data["version"]) d = {} ts, keys = [], [] for container in version.containers.all(): d.setdefault(container.host, []).append(container) for host, containers in d.iteritems(): cids = [c.id for c in containers] task_props = {"container_ids": cids} task = Task.create(consts.TASK_REMOVE, version, host, task_props) remove_containers.apply_async(args=(task.id, cids, True), task_id="task:%d" % task.id) ts.append(task.id) keys.append(task.result_key) return {"r": 0, "msg": "ok", "tasks": ts, "watch_keys": keys}
def build_image(group_name, pod_name, appname): data = request.get_json() group, pod, application, version = validate_instance(group_name, pod_name, appname, data['version']) # TODO # 这个group可以用这个pod不? # 这个group可以build这个version不? base = data['base'] host = pod.get_random_host() task = Task.create(consts.TASK_BUILD, version, host, {'base': base}) build_docker_image.apply_async( args=(task.id, base), task_id='task:%d' % task.id ) return {'r': 0, 'msg': 'ok', 'task': task.id, 'watch_key': task.result_key}
def remove_containers(task_id, cids, rmi=False): task = Task.get(task_id) if not task: current_flask.logger.error('Task (id=%s) not found, quit', task_id) return current_flask.logger.info('Task<id=%s>: Start on host %s' % (task_id, task.host.ip)) notifier = TaskNotifier(task) containers = Container.get_multi(cids) container_ids = [c.container_id for c in containers if c] host = task.host version = task.version try: # flag, don't report these flags = {'eru:agent:%s:container:flag' % cid: 1 for cid in container_ids} rds.mset(**flags) for c in containers: remove_container_backends(c) current_flask.logger.info('Task<id=%s>: Container (cid=%s) backends removed', task_id, c.container_id[:7]) appnames = {c.appname for c in containers} publish_to_service_discovery(*appnames) time.sleep(3) dockerjob.remove_host_containers(containers, host) current_flask.logger.info('Task<id=%s>: Containers (cids=%s) removed', task_id, cids) if rmi: try: dockerjob.remove_image(task.version, host) except Exception as e: current_flask.logger.error('Task<id=%s>: Exception (e=%s), fail to remove image', task_id, e) except Exception as e: task.finish_with_result(consts.TASK_FAILED) notifier.pub_fail() current_flask.logger.error('Task<id=%s>: Exception (e=%s)', task_id, e) else: for c in containers: c.delete() task.finish_with_result(consts.TASK_SUCCESS) notifier.pub_success() if container_ids: rds.hdel('eru:agent:%s:containers:meta' % host.name, *container_ids) rds.delete(*flags.keys()) current_flask.logger.info('Task<id=%s>: Done', task_id) if not version.containers.count(): falcon_remove_alarms(version)
def build_docker_image(task_id, base): task = Task.get(task_id) notifier = TaskNotifier(task) try: repo, tag = base.split(':', 1) notifier.store_and_broadcast(dockerjob.pull_image(task.host, repo, tag)) notifier.store_and_broadcast(dockerjob.build_image(task.host, task.version, base)) notifier.store_and_broadcast(dockerjob.push_image(task.host, task.version)) try: dockerjob.remove_image(task.version, task.host) except: pass except Exception, e: logger.exception(e) task.finish_with_result(code.TASK_FAILED) notifier.pub_fail()
def rm_containers(): cids = request.get_json()["cids"] version_dict = {} ts, watch_keys = [], [] for cid in cids: container = Container.get_by_container_id(cid) if not container: continue version_dict.setdefault((container.version, container.host), []).append(container) for (version, host), containers in version_dict.iteritems(): cids = [c.id for c in containers] task_props = {"container_ids": cids} task = Task.create(consts.TASK_REMOVE, version, host, task_props) remove_containers.apply_async(args=(task.id, cids, False), task_id="task:%d" % task.id) ts.append(task.id) watch_keys.append(task.result_key) return {"r": 0, "msg": "ok", "tasks": ts, "watch_keys": watch_keys}
def _create_task(version, host, ncontainer, cores, nshare, networks, ports, args, spec_ips, route, entrypoint, env, image=''): network_ids = [n.id for n in networks] # host 模式不允许绑定 vlan entry = version.appconfig['entrypoints'][entrypoint] if entry.get('network_mode') == 'host': network_ids = [] task_props = { 'ncontainer': ncontainer, 'entrypoint': entrypoint, 'env': env, 'full_cores': [c.label for c in cores.get('full', [])], 'part_cores': [c.label for c in cores.get('part', [])], 'ports': ports, 'args': args, 'nshare': nshare, 'networks': network_ids, 'image': image, 'route': route, } task = Task.create(consts.TASK_CREATE, version, host, task_props) if not task: return None if cores: try: create_containers_with_macvlan.apply_async( args=(task.id, ncontainer, nshare, cores, network_ids, spec_ips), task_id='task:%d' % task.id ) except Exception as e: logger.exception(e) host.release_cores(cores) else: try: create_containers_with_macvlan_public.apply_async( args=(task.id, ncontainer, nshare, network_ids, spec_ips), task_id='task:%d' % task.id ) except Exception as e: logger.exception(e) return task
def build_image(): data = request.get_json() _, version = _get_app_and_version(**data) base = data['base'] if ':' not in base: base = base + ':latest' host = Host.get_random_public_host() if not host: abort(406, 'no host is available') task = Task.create(TASK_BUILD, version, host, {'base': base}) build_docker_image.apply_async( args=(task.id, base, None), task_id='task:%d' % task.id ) return {'task': task.id, 'watch_key': task.result_key}
def migrate_container(container_id, need_to_remove=True): container = Container.get_by_container_id(container_id) if not container: _log.error('container %s is not found, ignore migration', container_id) return ncore, nshare = container.host.pod.get_core_allocation(container.ncore) host_cores = average_schedule(container.host.pod, 1, ncore, nshare, None) if not host_cores: _log.error('not enough cores to migrate') return cids = [container.id] spec_ips = cidrs = container.get_ips() (host, container_count), cores = next(host_cores.iteritems()) props = { 'ncontainer': 1, 'entrypoint': container.entrypoint, 'env': container.env, 'full_cores': [c.label for c in cores.get('full', [])], 'part_cores': [c.label for c in cores.get('part', [])], 'ports': None, 'args': None, 'nshare': nshare, 'networks': cidrs, 'image': None, 'route': '', 'callback_url': container.callback_url, 'container_ids': cids, } task = Task.create(consts.TASK_MIGRATE, container.version, host, props) if not task: _log.error('create migrate task error') return _log.info('start migration...') if need_to_remove: remove_containers.apply(args=(task.id, cids, False), task_id='task:%s' % task.id) create_containers.apply(args=(task.id, 1, nshare, cores, cidrs, spec_ips), task_id='task:%s' % task.id) _log.info('migration done')
def offline_version(group_name, pod_name, appname): data = request.get_json() group, pod, application, version = validate_instance(group_name, pod_name, appname, data['version']) d = {} ts, keys = [], [] for container in version.containers.all(): d.setdefault(container.host, []).append(container) for host, containers in d.iteritems(): cids = [c.id for c in containers] task_props = {'container_ids': cids} task = Task.create(consts.TASK_REMOVE, version, host, task_props) remove_containers.apply_async( args=(task.id, cids, True), task_id='task:%d' % task.id ) ts.append(task.id) keys.append(task.result_key) return {'r': 0, 'msg': 'ok', 'tasks': ts, 'watch_keys': keys}
def offline_version(): data = request.get_json() pod, _, version = _get_instances(**data) d = {} for container in version.containers.all(): d.setdefault(container.host, []).append(container) task_ids, watch_keys = [], [] for host, containers in d.iteritems(): cids = [c.id for c in containers] task = Task.create(TASK_REMOVE, version, host, {'container_ids': cids}) remove_containers.apply_async( args=(task.id, cids, True), task_id='task:%d' % task.id ) task_ids.append(task.id) watch_keys.append(task.result_key) return {'tasks': task_ids, 'watch_keys': watch_keys}
def rm_containers(group_name, pod_name, appname): data = request.get_json() group, pod, application, version = validate_instance(group_name, pod_name, appname, data['version']) host = Host.get_by_name(data['host']) # 直接拿前ncontainer个吧, 反正他们都等价的 containers = host.get_containers_by_version(version)[:int(data['ncontainer'])] try: cids = [c.id for c in containers] task_props = {'container_ids': cids} task = Task.create(code.TASK_REMOVE, version, host, task_props) remove_containers.apply_async( args=(task.id, cids, False), task_id='task:%d' % task.id ) return {'r': 0, 'msg': 'ok', 'task': task.id, 'watch_key': task.result_key} except Exception, e: logger.exception(e) return {'r': 1, 'msg': str(e), 'task': None, 'watch_key': None}
def rm_containers(): cids = request.get_json()['cids'] version_dict = {} ts, watch_keys = [], [] for cid in cids: container = Container.get_by_container_id(cid) if not container: continue version_dict.setdefault((container.version, container.host), []).append(container) for (version, host), containers in version_dict.iteritems(): cids = [c.id for c in containers] task_props = {'container_ids': cids} task = Task.create(consts.TASK_REMOVE, version, host, task_props) remove_containers.apply_async( args=(task.id, cids, False), task_id='task:%d' % task.id ) ts.append(task.id) watch_keys.append(task.result_key) return {'r': 0, 'msg': 'ok', 'tasks': ts, 'watch_keys': watch_keys}
def clean_app(app_name): app = App.get_by_name(app_name) if not app: print 'app %s not found' % app_name return containers = app.list_containers(limit=None) version_dict = {} for c in containers: if not c: continue version_dict.setdefault((c.version, c.host), []).append(c) for (version, host), cs in version_dict.iteritems(): cids = [c.id for c in cs] task_props = {'container_ids': cids} task = Task.create(TASK_REMOVE, version, host, task_props) remove_containers.apply_async(args=(task.id, cids, False), task_id='task:%s' % task.id) print task print 'done, waiting...'
def _create_task(type_, version, host, ncontainer, cores, networks, entrypoint, env): try: core_ids = [c.id for c in cores] network_ids = [n.id for n in networks] task_props = { 'ncontainer': ncontainer, 'entrypoint': entrypoint, 'env': env, 'cores': core_ids, 'networks': network_ids, } task = Task.create(type_, version, host, task_props) create_containers_with_macvlan.apply_async( args=(task.id, ncontainer, core_ids, network_ids), task_id='task:%d' % task.id ) return task except Exception, e: logger.exception(e) host.release_cores(cores)
def migrate_container(container_id, need_to_remove=True): container = Container.get_by_container_id(container_id) if not container: _log.error('container %s is not found, ignore migration', container_id) return ncore, nshare= container.host.pod.get_core_allocation(container.ncore) host_cores = average_schedule(container.host.pod, 1, ncore, nshare, None) if not host_cores: _log.error('not enough cores to migrate') return cids = [container.id] spec_ips = cidrs = container.get_ips() (host, container_count), cores = next(host_cores.iteritems()) props = { 'ncontainer': 1, 'entrypoint': container.entrypoint, 'env': container.env, 'full_cores': [c.label for c in cores.get('full', [])], 'part_cores': [c.label for c in cores.get('part', [])], 'ports': None, 'args': None, 'nshare': nshare, 'networks': cidrs, 'image': None, 'route': '', 'callback_url': container.callback_url, 'container_ids': cids, } task = Task.create(consts.TASK_MIGRATE, container.version, host, props) if not task: _log.error('create migrate task error') return _log.info('start migration...') if need_to_remove: remove_containers.apply(args=(task.id, cids, False), task_id='task:%s' % task.id) create_containers.apply(args=(task.id, 1, nshare, cores, cidrs, spec_ips), task_id='task:%s' % task.id) _log.info('migration done')
def remove_containers(task_id, cids, rmi): task = Task.get(task_id) notifier = TaskNotifier(task) containers = Container.get_multi(cids) container_ids = [c.container_id for c in containers] host = task.host try: flags = {'eru:agent:%s:container:flag' % cid: 1 for cid in container_ids} rds.mset(**flags) for c in containers: remove_container_backends(c) appnames = {c.appname for c in containers} publish_to_service_discovery(*appnames) dockerjob.remove_host_containers(containers, host) if rmi: dockerjob.remove_image(task.version, host) except Exception, e: logger.exception(e) task.finish_with_result(code.TASK_FAILED) notifier.pub_fail()
def clean_app(app_name): app = App.get_by_name(app_name) if not app: print 'app %s not found' % app_name return containers = app.list_containers(limit=None) version_dict = {} for c in containers: if not c: continue version_dict.setdefault((c.version, c.host), []).append(c) for (version, host), cs in version_dict.iteritems(): cids = [c.id for c in cs] task_props = {'container_ids': cids} task = Task.create(TASK_REMOVE, version, host, task_props) remove_containers.apply_async( args=(task.id, cids, False), task_id='task:%s' % task.id ) print task print 'done, waiting...'
def build_docker_image(task_id, base): current_flask.logger.info('Task<id=%s>: Started', task_id) task = Task.get(task_id) if not task: current_flask.logger.error('Task (id=%s) not found, quit', task_id) return notifier = TaskNotifier(task) try: repo, tag = base.split(':', 1) current_flask.logger.info('Task<id=%s>: Pull base image (base=%s)', task_id, base) notifier.store_and_broadcast(dockerjob.pull_image(task.host, repo, tag)) current_flask.logger.info('Task<id=%s>: Build image (base=%s)', task_id, base) notifier.store_and_broadcast(dockerjob.build_image(task.host, task.version, base)) current_flask.logger.info('Task<id=%s>: Push image (base=%s)', task_id, base) notifier.store_and_broadcast(dockerjob.push_image(task.host, task.version)) dockerjob.remove_image(task.version, task.host) except Exception, e: task.finish_with_result(consts.TASK_FAILED) notifier.pub_fail() current_flask.logger.error('Task<id=%s>: Exception (e=%s)', task_id, e)
def fillup_data(): app, _ = create_app_with_celery() with app.app_context(): db.drop_all() db.create_all() group = Group.create('test-group', 'test-group') pod = Pod.create('test-pod', 'test-pod') pod.assigned_to_group(group) r = requests.get('http://192.168.59.103:2375/info').json() host = Host.create(pod, '192.168.59.103:2375', r['Name'], r['ID'], r['NCPU'], r['MemTotal']) host.assigned_to_group(group) app = App.get_or_create('nbetest', 'http://git.hunantv.com/platform/nbetest.git', 'token') app.add_version('96cbf8c68ed214f105d9f79fa4f22f0e80e75cf3') app.assigned_to_group(group) version = app.get_version('96cbf8') host_cores = group.get_free_cores(pod, 2, 2) cores = [] for (host, cn), coress in host_cores.iteritems(): print host, cn, coress cores = coress print cores ports = host.get_free_ports(2) print ports props = { 'entrypoint': 'web', 'ncontainer': 2, 'env': 'PROD', 'cores': [c.id for c in cores], 'ports': [p.id for p in ports], } task = Task.create(1, version, host, props) print task.props host.occupy_cores(cores) host.occupy_ports(ports) print group.get_free_cores(pod, 1, 1)
def _create_task(version, host, ncontainer, cores, nshare, networks, ports, args, spec_ips, entrypoint, env, image='', callback_url=''): # host 模式不允许绑定 vlan entry = version.appconfig['entrypoints'][entrypoint] if entry.get('network_mode') == 'host': cidrs = [] else: cidrs = [n.cidr for n in networks] task_props = { 'ncontainer': ncontainer, 'entrypoint': entrypoint, 'env': env, 'full_cores': [c.label for c in cores.get('full', [])], 'part_cores': [c.label for c in cores.get('part', [])], 'ports': ports, 'args': args, 'nshare': nshare, 'networks': cidrs, 'image': image, 'route': entry.get('network_route', ''), 'callback_url': callback_url, } task = Task.create(TASK_CREATE, version, host, task_props) if not task: return None try: create_containers.apply_async( args=(task.id, ncontainer, nshare, cores, cidrs, spec_ips), task_id='task:%d' % task.id ) except Exception as e: _log.exception(e) host.release_cores(cores) return task
def create_containers(task_id, ncontainer, nshare, cores, network_ids, spec_ips=None): """ 执行task_id的任务. 部署ncontainer个容器, 占用*_core_ids这些核, 绑定到networks这些子网 """ _log.info('Task<id=%s>: Started', task_id) task = Task.get(task_id) if not task: _log.error('Task (id=%s) not found, quit', task_id) return if spec_ips is None: spec_ips = [] need_network = bool(network_ids) networks = [ipam.get_pool(n) for n in network_ids] notifier = TaskNotifier(task) host = task.host version = task.version entrypoint = task.props['entrypoint'] env = task.props['env'] ports = task.props['ports'] args = task.props['args'] # use raw image = task.props['image'] callback_url = task.props['callback_url'] cpu_shares = int(float(nshare) / host.pod.core_share * 1024) if nshare else 1024 cids = [] backends = [] entry = version.appconfig.entrypoints[entrypoint] for fcores, pcores in _iter_cores(cores, ncontainer): cores_for_one_container = {'full': fcores, 'part': pcores} # 在宿主机上创建容器 try: cid, cname = dockerjob.create_one_container( host, version, entrypoint, env, fcores + pcores, ports=ports, args=args, cpu_shares=cpu_shares, image=image, need_network=need_network) except Exception as e: # 写给celery日志看 _log.exception(e) host.release_cores(cores_for_one_container, nshare) continue # 容器记录下来 c = Container.create(cid, host, version, cname, entrypoint, cores_for_one_container, env, nshare, callback_url) # 为容器创建网络栈 # 同时把各种信息都记录下来 # 如果失败, 清除掉所有记录和宿主机上的容器 # 循环下一次尝试 cidrs = [n.netspace for n in networks] if not ipam.allocate_ips(cidrs, cid, spec_ips): _clean_failed_containers(cid) continue notifier.notify_agent(c) add_container_for_agent(host, c) add_container_backends(c) cids.append(cid) backends.extend(c.get_backends()) c.callback_report(status='start') health_check = entry.get('health_check', '') if health_check and backends: urls = [b + health_check for b in backends] if not wait_health_check(urls): # TODO 这里要么回滚要么报警 _log.info('Task<id=%s>: Done, but something went error', task_id) return publish_to_service_discovery(version.name) task.finish(consts.TASK_SUCCESS) task.reason = 'ok' task.container_ids = cids notifier.pub_success() _log.info('Task<id=%s>: Done', task_id)
def task_log(task_id): task = Task.get(task_id) if not task: abort(404, 'Task %s not found' % task_id) return [json.loads(l) for l in rds.lrange(task.log_key, 0, -1)]
def get_task(task_id): task = Task.get(task_id) if not task: abort(404, 'Task %s not found' % task_id) return task