Пример #1
0
def process(job):
    util.descriptor_correct(job)

    job_dir, in_dir, out_dir = logic.create_workdir(job)

    mounted_ids = []
    container_id = None
    try:
        logic.get_input_files(job, in_dir)

        with LockFile(config.LOCK_FILE):
            mounted_ids, container_id = logic.create_containers(
                job, in_dir, out_dir)

        while harbor.is_running(container_id):
            logger.debug("Container is running. Sleeping for {} sec.".format(
                config.CONTAINER_CHECK_INTERVAL))
            time.sleep(config.CONTAINER_CHECK_INTERVAL)

        logic.write_std_output(container_id, out_dir)
        logic.handle_output(job, out_dir)
        logger.debug("Setting job.status='completed'")
        job.status = Job.COMPLETED
    except Exception as e:
        capture_exception()
        traceback.print_exc()
        raise e
    finally:
        logic.cleanup_dir(job_dir)

        cnt_to_remove = mounted_ids
        if container_id:
            cnt_to_remove += [container_id]

        logic.cleanup_containers(cnt_to_remove)
Пример #2
0
def create_containers(job, in_dir, out_dir):
    # Add needed containers
    logger.debug("Creating containers")
    descriptor = json.loads(job.input)

    mounted_ids = []
    mounted_names = []
    needed = descriptor['container'].get('needed_containers', [])
    for i, container in enumerate(needed):
        image, volumes = container['name'], container['volumes']
        assert isinstance(volumes, list)

        if not config.ONLY_LOCAL_IMAGES:
            harbor.pull_image(image)

        tag = "JOB-{}-CNT-{}".format(job.id, i)
        mounted_names.append(tag)

        c_id = harbor.create_container(
            image,
            volumes=volumes,
            detach=True,
            name=tag,
            mem_limit="{}m".format(descriptor['max_memoryMB']),
        )
        mounted_ids.append(c_id)

    # Execute environment container
    if not config.ONLY_LOCAL_IMAGES:
        harbor.pull_image(descriptor['container']['name'])

    command = util.build_command(job)
    logger.debug('Command to execute: {}'.format(command))

    entrypoint = descriptor['container'].get('entrypoint', '')
    extra_flags = descriptor['container'].get('extra_flags', [])
    needed_volumes = descriptor['container'].get('volumes', [])
    volumes_list = util.obtain_volumes(in_dir, out_dir, needed_volumes)

    main_id = harbor.create_container(
        descriptor['container']['name'],
        working_dir=descriptor['container']['workdir'],
        command=command,
        entrypoint=entrypoint,
        volumes=volumes_list,
        detach=True,
    )

    harbor.start_container(
        main_id,
        volumes_from=mounted_names,
        binds=volumes_list
    )

    return mounted_ids, main_id
Пример #3
0
def REMOVE_ALL_CONTAINERS():
    "Use with caution"
    logger.debug("Killing and removing all containers!")
    all_ids = [c['Id'] for c in client.containers(all=True)]
    for container_id in all_ids:
        for retries in xrange(20):
            try:
                client.remove_container(container_id, force=True)
                break
            except:
                capture_exception()
                continue
Пример #4
0
def do_docker_job(job, stub):
    logger.debug("Got descriptor: {}".format(job.input))
    try:
        job.status = Job.RUNNING
        stub.ModifyJob(job)

        process(job)

        job.status = Job.COMPLETED
        stub.ModifyJob(job)

        logger.debug("Finished")
    except BaseException as e:
        capture_exception()
        if job.status != Job.COMPLETED:
            job.status = Job.FAILED

        if config.DEBUG:
            logger.debug({
                "hostname": socket.gethostname(),
                "exception": str(e),
                "traceback": traceback.format_exc()
            })

        logger.error(str(e))
        logger.error(traceback.format_exc())
        raise e
Пример #5
0
def do_docker_job(job, completion_event):
    logger.debug("Got descriptor: {}".format(job.input))
    try:
        job.status = Job.RUNNING

        stub = new_client()
        stub.ModifyJob(job, timeout=5)
        del stub

        process(job)

        stub = new_client()
        job.status = Job.COMPLETED
        stub.ModifyJob(job, timeout=5)
        completion_event.set()

        logger.debug("Finished")
    except BaseException as e:
        capture_exception()
        if job.status != Job.COMPLETED:
            job.status = Job.FAILED

        debug_info = {
            "hostname": socket.gethostname(),
            "exception": str(e),
            "traceback": traceback.format_exc()
        }

        job.metadata = json.dumps(debug_info)

        stub = new_client()
        stub.ModifyJob(job, timeout=5)

        logger.error(str(e))
        logger.error(traceback.format_exc())
        completion_event.set()
        raise e
Пример #6
0
def start_container(container_id, **kwargs):
    attempts = 0
    while attempts < config.DOCKER_START_ATTEMPTS:
        logger.debug("Trying to start container id={}".format(container_id))
        try:
            client.start(container_id, **kwargs)
            break
        except Exception as e:
            capture_exception()
            logger.debug("Failed to start container id={}, error: {}".format(
                container_id, e))
            attempts += 1

    if attempts < config.DOCKER_START_ATTEMPTS:
        logger.debug("Started container id={}".format(container_id))
        return True
    else:
        raise Exception('Failed to start container id={}'.format(container_id))
Пример #7
0
def get_input_files(job, in_dir):
    descriptor = json.loads(job.input)
    for input_file in descriptor['input']:
        logger.debug("Download input {}".format(input_file))
        config.backend.copy_from_backend(input_file, in_dir)
Пример #8
0
def cleanup_dir(job_dir):
    logger.debug("Cleaning up directories")
    pre_remove_hook()
    shutil.rmtree(job_dir)
Пример #9
0
def cleanup_containers(cnt_ids):
    logger.debug("Cleaning up containers")
    for container_id in cnt_ids:
        harbor.remove(container_id, v=True, force=True)
Пример #10
0
def pre_remove_hook():
    logger.debug("Executing pre-remove hook: `{}`".format(
        config.PRE_REMOVE_HOOK))
    os.system(config.PRE_REMOVE_HOOK)
Пример #11
0
def upload_output_files(out_dir, upload_uri):
    logger.debug("Upload output directory `{}` to `{}`".format(
        out_dir, upload_uri))
    config.backend.copy_to_backend(out_dir, upload_uri)
    return config.backend.list_uploaded(upload_uri)
Пример #12
0
def create_container(image, **kwargs):
    logger.debug("Creating container for image {} with arguments: {}".format(
        image, kwargs))
    c = client.create_container(image, **kwargs)
    return c['Id']
Пример #13
0
def pull_image(image, *args, **kwargs):
    logger.debug("Pulling image {}".format(image))
    client.pull(image, *args, **kwargs)