Exemplo n.º 1
0
Arquivo: node.py Projeto: bioid/brenda
    def task_loop():
        try:
            # reset tasks
            local.task_active = None
            local.task_push = None

            # get SQS work queue
            q = aws.get_sqs_queue(conf)

            # Loop over tasks.  There are up to two different tasks at any
            # given moment that we are processing concurrently:
            #
            # 1. Active task -- usually a blender render operation.
            # 2. S3 push task -- a task which pushes the products of the
            #                    previous active task (such as rendered
            #                    frames) to S3.
            while True:
                # reset active task
                local.task_active = None

                # initialize active task object
                task = State()
                task.msg = None
                task.proc = None
                task.retcode = None
                task.outdir = None
                task.id = 0

                # Get a task from the SQS work queue.  This is normally
                # a short script that runs blender to render one
                # or more frames.
                queuemsg = q.get_messages(message_attributes=['config'])

                # output some debug info
                print "queue read:", task.msg
                if local.task_push:
                    print "push task:", local.task_push.__dict__
                else:
                    print "no task push task"

                # process task
                if len(queuemsg) > 0:
                    task.msg = queuemsg[0]

                    # assign an ID to task
                    local.task_id_counter += 1
                    task.id = local.task_id_counter

                    # register active task
                    local.task_active = task

                    # create output directory
                    task.outdir = os.path.join(work_dir, "brenda-outdir%d.tmp" % (task.id,))
                    utils.rmtree(task.outdir)
                    utils.mkdir(task.outdir)

                    # Create a config dictionary using combination of global and task-specific config values
                    taskconfig = conf.copy()
                    if 'config' in task.msg.message_attributes:
                        taskconfig.update(json.loads(task.msg.message_attributes['config']['string_value']))

                    # Store outdir in task config for later use
                    taskconfig['OUTDIR'] = task.outdir
                    if not 'BLENDER_FILE' in taskconfig:
                        taskconfig['BLENDER_FILE'] = '*.blend'

                    print "task-specific config:", taskconfig

                    # get the task script
                    script = task.msg.get_body()
                    print "script len:", len(script)


                    # do macro substitution on the task script
                    for k in taskconfig:
                        script = script.replace('$' + k, taskconfig[k])

                    # add shebang if absent
                    if not script.startswith("#!"):
                        script = "#!/bin/bash\n" + script

                    # Make sure we're working with the correct project directory
                    # FIXME - this is likely not the most efficient way of doing it, and probably leads to unnecessary
                    #         downloads from s3.  Ideally we would keep all project directories and switch between them,
                    #         but currently brenda only supports one working project directory at a time
                    proj_dir = get_project(taskconfig, taskconfig['BLENDER_PROJECT'])

                    # mount additional EBS volumes
                    aws.mount_additional_ebs(taskconfig, proj_dir)

                    # cd to project directory, where we will run blender from
                    with utils.Cd(proj_dir) as cd:
                        # write script file and make it executable
                        script_fn = "./brenda-go"
                        with open(script_fn, 'w') as f:
                            f.write(script)
                        st = os.stat(script_fn)
                        os.chmod(script_fn, st.st_mode | (stat.S_IEXEC|stat.S_IXGRP|stat.S_IXOTH))

                        # run the script
                        print "------- Run script %s -------" % (os.path.realpath(script_fn),)
                        print script,
                        print "--------------------------"
                        task.proc = Subprocess([script_fn])

                    print "active task:", local.task_active.__dict__

                # Wait for active and S3-push tasks to complete,
                # while periodically reasserting with SQS to
                # acknowledge that tasks are still pending.
                # (If we don't reassert with SQS frequently enough,
                # it will assume we died, and put our tasks back
                # in the queue.  "frequently enough" means within
                # visibility_timeout.)
                count = 0
                while True:
                    reassert = (count >= visibility_timeout_reassert)
                    for i, task in enumerate((local.task_active, local.task_push)):
                        if task:
                            name = task_names[i]
                            if task.proc is not None:
                                # test if process has finished
                                task.retcode = task.proc.poll()
                                if task.retcode is not None:
                                    # process has finished
                                    task.proc = None

                                    # did process finish with errors?
                                    if task.retcode != 0:
                                        errtxt = "fatal error in %s task" % (name,)
                                        if name == 'active':
                                            raise error.ValueErrorRetry(errtxt)
                                        else:
                                            raise ValueError(errtxt)

                                    # Process finished successfully.  If S3-push process,
                                    # tell SQS that the task completed successfully.
                                    if name == 'push':
                                        print "******* TASK", task.id, "COMMITTED to S3"
                                        q.delete_message(task.msg)
                                        task.msg = None
                                        local.task_count += 1
                                        task_complete_accounting(local.task_count)

                                    # active task completed?
                                    if name == 'active':
                                        print "******* TASK", task.id, "READY-FOR-PUSH"

                            # tell SQS that we are still working on the task
                            if reassert and task.proc is not None:
                                print "******* REASSERT", name, task.id
                                task.msg.change_visibility(visibility_timeout)

                    # break out of loop only when no pending tasks remain
                    if ((not local.task_active or local.task_active.proc is None)
                        and (not local.task_push or local.task_push.proc is None)):
                        break

                    # setup for next process poll iteration
                    if reassert:
                        count = 0
                    time.sleep(1)
                    count += 1

                # clean up the S3-push task
                cleanup(local.task_push, 'push')
                local.task_push = None

                # start a concurrent push task to commit files generated by
                # just-completed active task (such as blender render frames) to S3
                if local.task_active:
                    local.task_active.proc = start_s3_push_process(opts, args, taskconfig, local.task_active.outdir)
                    local.task_push = local.task_active
                    local.task_active = None

                # if no active task and no S3-push task, we are done (unless DONE is set to "poll")
                if not local.task_active and not local.task_push:
                    action = read_done_file()
                    if action == "poll":
                        print "Polling for more work..."
                        time.sleep(15)
                    elif action == "smart":
                        now = time.time()
                        try:
                            instance_id = aws.get_instance_id_self()
                            spot_request_id = aws.get_spot_request_from_instance_id(conf, instance_id)
                            launch_time = aws.get_launch_time(conf, spot_request_id)
                            if launch_time:
                                spottime = aws.get_uptime(now, launch_time)
                                minutes_after_hour = (spottime / 60) % 60
                                print "Smart poll: ", minutes_after_hour
                                if minutes_after_hour >= smart_shutdown_threshold:
                                    print "Smart poll threshold passed, shutting down (%d minutes after the hour with no work in queue)" % (minutes_after_hour)
                                    # update the value of DONE config var for clean shutdown
                                    conf['DONE'] = 'shutdown'
                                    write_done_file()
                                    break;
                            else:
                                print "Smart poll: no launch_time for spot request %s" % (spot_request_id)
                        except Exception, e:
                            print "Smart poll failed!", e

                        time.sleep(15)
                    else:
                        break

        finally:
            cleanup_all()
Exemplo n.º 2
0
    def task_loop():
        try:
            # reset tasks
            local.task_active = None
            local.task_push = None

            # get SQS work queue
            q = aws.get_sqs_queue(conf)

            # Loop over tasks.  There are up to two different tasks at any
            # given moment that we are processing concurrently:
            #
            # 1. Active task -- usually a blender render operation.
            # 2. S3 push task -- a task which pushes the products of the
            #                    previous active task (such as rendered
            #                    frames) to S3.
            while True:
                # reset active task
                local.task_active = None

                # initialize active task object
                task = State()
                task.msg = None
                task.proc = None
                task.retcode = None
                task.outdir = None
                task.id = 0

                # Get a task from the SQS work queue.  This is normally
                # a short script that runs blender to render one
                # or more frames.
                task.msg = q.read()

                # output some debug info
                print "queue read:", task.msg
                if local.task_push:
                    print "push task:", local.task_push.__dict__
                else:
                    print "no task push task"

                # process task
                if task.msg is not None:
                    # assign an ID to task
                    local.task_id_counter += 1
                    task.id = local.task_id_counter

                    # register active task
                    local.task_active = task

                    # create output directory
                    task.outdir = os.path.join(
                        work_dir, "brenda-outdir%d.tmp" % (task.id, ))
                    utils.rmtree(task.outdir)
                    utils.mkdir(task.outdir)

                    # get the task script
                    script = task.msg.get_body()
                    print "script len:", len(script)

                    # do macro substitution on the task script
                    script = script.replace('$OUTDIR', task.outdir)

                    # add shebang if absent
                    if not script.startswith("#!"):
                        script = "#!/bin/bash\n" + script

                    # cd to project directory, where we will run blender from
                    with utils.Cd(proj_dir) as cd:
                        # write script file and make it executable
                        script_fn = "./brenda-go"
                        with open(script_fn, 'w') as f:
                            f.write(script)
                        st = os.stat(script_fn)
                        os.chmod(
                            script_fn, st.st_mode |
                            (stat.S_IEXEC | stat.S_IXGRP | stat.S_IXOTH))

                        # run the script
                        print "------- Run script %s -------" % (
                            os.path.realpath(script_fn), )
                        print script,
                        print "--------------------------"
                        task.proc = Subprocess([script_fn])

                    print "active task:", local.task_active.__dict__

                # Wait for active and S3-push tasks to complete,
                # while periodically reasserting with SQS to
                # acknowledge that tasks are still pending.
                # (If we don't reassert with SQS frequently enough,
                # it will assume we died, and put our tasks back
                # in the queue.  "frequently enough" means within
                # visibility_timeout.)
                count = 0
                while True:
                    reassert = (count >= visibility_timeout_reassert)
                    for i, task in enumerate(
                        (local.task_active, local.task_push)):
                        if task:
                            name = task_names[i]
                            if task.proc is not None:
                                # test if process has finished
                                task.retcode = task.proc.poll()
                                if task.retcode is not None:
                                    # process has finished
                                    task.proc = None

                                    # did process finish with errors?
                                    if task.retcode != 0:
                                        errtxt = "fatal error in %s task" % (
                                            name, )
                                        if name == 'active':
                                            raise error.ValueErrorRetry(errtxt)
                                        else:
                                            raise ValueError(errtxt)

                                    # Process finished successfully.  If S3-push process,
                                    # tell SQS that the task completed successfully.
                                    if name == 'push':
                                        print "******* TASK", task.id, "COMMITTED to S3"
                                        q.delete_message(task.msg)
                                        task.msg = None
                                        local.task_count += 1
                                        task_complete_accounting(
                                            local.task_count)

                                    # active task completed?
                                    if name == 'active':
                                        print "******* TASK", task.id, "READY-FOR-PUSH"

                            # tell SQS that we are still working on the task
                            if reassert and task.proc is not None:
                                print "******* REASSERT", name, task.id
                                task.msg.change_visibility(visibility_timeout)

                    # break out of loop only when no pending tasks remain
                    if ((not local.task_active
                         or local.task_active.proc is None) and
                        (not local.task_push or local.task_push.proc is None)):
                        break

                    # setup for next process poll iteration
                    if reassert:
                        count = 0
                    time.sleep(1)
                    count += 1

                # clean up the S3-push task
                cleanup(local.task_push, 'push')
                local.task_push = None

                # start a concurrent push task to commit files generated by
                # just-completed active task (such as blender render frames) to S3
                if local.task_active:
                    local.task_active.proc = start_s3_push_process(
                        opts, args, conf, local.task_active.outdir)
                    local.task_push = local.task_active
                    local.task_active = None

                # if no active task and no S3-push task, we are done (unless DONE is set to "poll")
                if not local.task_active and not local.task_push:
                    if read_done_file() == "poll":
                        print "Polling for more work..."
                        time.sleep(15)
                    else:
                        break

        finally:
            cleanup_all()
Exemplo n.º 3
0
def status(opts, args, conf):
    q = aws.get_sqs_queue(conf)
    if q is not None:
        print "Queued tasks:", q.count()
Exemplo n.º 4
0
def status(opts, args, conf):
    q = aws.get_sqs_queue(conf)
    if q is not None:
        print "Queued tasks:", q.count()
Exemplo n.º 5
0
Arquivo: node.py Projeto: Anuga/brenda
    def task_loop():
        try:
            # reset tasks
            local.task_active = None
            local.task_push = None

            # get SQS work queue
            q = aws.get_sqs_queue(conf)

            # Loop over tasks.  There are up to two different tasks at any
            # given moment that we are processing concurrently:
            #
            # 1. Active task -- usually a blender render operation.
            # 2. S3 push task -- a task which pushes the products of the
            #                    previous active task (such as rendered
            #                    frames) to S3.
            while True:
                # reset active task
                local.task_active = None

                # initialize active task object
                task = State()
                task.msg = None
                task.proc = None
                task.retcode = None
                task.outdir = None
                task.id = 0

                # Get a task from the SQS work queue.  This is normally
                # a short script that runs blender to render one
                # or more frames.
                task.msg = q.read()

                # output some debug info
                print "queue read:", task.msg
                if local.task_push:
                    print "push task:", local.task_push.__dict__
                else:
                    print "no task push task"

                # process task
                if task.msg is not None:
                    # assign an ID to task
                    local.task_id_counter += 1
                    task.id = local.task_id_counter

                    # register active task
                    local.task_active = task

                    # create output directory
                    task.outdir = os.path.join(work_dir, "brenda-outdir%d.tmp" % (task.id,))
                    utils.rmtree(task.outdir)
                    utils.mkdir(task.outdir)

                    # get the task script
                    script = task.msg.get_body()
                    print "script len:", len(script)

                    # do macro substitution on the task script
                    script = script.replace('$OUTDIR', task.outdir)

                    # add shebang if absent
                    if not script.startswith("#!"):
                        script = "#!/bin/bash\n" + script

                    # cd to project directory, where we will run blender from
                    with utils.Cd(proj_dir) as cd:
                        # write script file and make it executable
                        script_fn = "./brenda-go"
                        with open(script_fn, 'w') as f:
                            f.write(script)
                        st = os.stat(script_fn)
                        os.chmod(script_fn, st.st_mode | (stat.S_IEXEC|stat.S_IXGRP|stat.S_IXOTH))

                        # run the script
                        print "------- Run script %s -------" % (os.path.realpath(script_fn),)
                        print script,
                        print "--------------------------"
                        task.proc = Subprocess([script_fn])

                    print "active task:", local.task_active.__dict__

                # Wait for active and S3-push tasks to complete,
                # while periodically reasserting with SQS to
                # acknowledge that tasks are still pending.
                # (If we don't reassert with SQS frequently enough,
                # it will assume we died, and put our tasks back
                # in the queue.  "frequently enough" means within
                # visibility_timeout.)
                count = 0
                while True:
                    reassert = (count >= visibility_timeout_reassert)
                    for i, task in enumerate((local.task_active, local.task_push)):
                        if task:
                            name = task_names[i]
                            if task.proc is not None:
                                # test if process has finished
                                task.retcode = task.proc.poll()
                                if task.retcode is not None:
                                    # process has finished
                                    task.proc = None

                                    # did process finish with errors?
                                    if task.retcode != 0:
                                        errtxt = "fatal error in %s task" % (name,)
                                        if name == 'active':
                                            raise error.ValueErrorRetry(errtxt)
                                        else:
                                            raise ValueError(errtxt)

                                    # Process finished successfully.  If S3-push process,
                                    # tell SQS that the task completed successfully.
                                    if name == 'push':
                                        print "******* TASK", task.id, "COMMITTED to S3"
                                        q.delete_message(task.msg)
                                        task.msg = None
                                        local.task_count += 1
                                        task_complete_accounting(local.task_count)

                                    # active task completed?
                                    if name == 'active':
                                        print "******* TASK", task.id, "READY-FOR-PUSH"

                            # tell SQS that we are still working on the task
                            if reassert and task.proc is not None:
                                print "******* REASSERT", name, task.id
                                task.msg.change_visibility(visibility_timeout)

                    # break out of loop only when no pending tasks remain
                    if ((not local.task_active or local.task_active.proc is None)
                        and (not local.task_push or local.task_push.proc is None)):
                        break

                    # setup for next process poll iteration
                    if reassert:
                        count = 0
                    time.sleep(1)
                    count += 1

                # clean up the S3-push task
                cleanup(local.task_push, 'push')
                local.task_push = None

                # start a concurrent push task to commit files generated by
                # just-completed active task (such as blender render frames) to S3
                if local.task_active:
                    local.task_active.proc = start_s3_push_process(opts, args, conf, local.task_active.outdir)
                    local.task_push = local.task_active
                    local.task_active = None

                # if no active task and no S3-push task, we are done (unless DONE is set to "poll")
                if not local.task_active and not local.task_push:
                    if read_done_file() == "poll":
                        print "Polling for more work..."
                        time.sleep(15)
                    else:
                        break

        finally:
            cleanup_all()