def task_loop(): try: # reset tasks local.task_active = None local.task_push = None # get SQS work queue q = aws.get_sqs_queue(conf) # Loop over tasks. There are up to two different tasks at any # given moment that we are processing concurrently: # # 1. Active task -- usually a blender render operation. # 2. S3 push task -- a task which pushes the products of the # previous active task (such as rendered # frames) to S3. while True: # reset active task local.task_active = None # initialize active task object task = State() task.msg = None task.proc = None task.retcode = None task.outdir = None task.id = 0 # Get a task from the SQS work queue. This is normally # a short script that runs blender to render one # or more frames. queuemsg = q.get_messages(message_attributes=['config']) # output some debug info print "queue read:", task.msg if local.task_push: print "push task:", local.task_push.__dict__ else: print "no task push task" # process task if len(queuemsg) > 0: task.msg = queuemsg[0] # assign an ID to task local.task_id_counter += 1 task.id = local.task_id_counter # register active task local.task_active = task # create output directory task.outdir = os.path.join(work_dir, "brenda-outdir%d.tmp" % (task.id,)) utils.rmtree(task.outdir) utils.mkdir(task.outdir) # Create a config dictionary using combination of global and task-specific config values taskconfig = conf.copy() if 'config' in task.msg.message_attributes: taskconfig.update(json.loads(task.msg.message_attributes['config']['string_value'])) # Store outdir in task config for later use taskconfig['OUTDIR'] = task.outdir if not 'BLENDER_FILE' in taskconfig: taskconfig['BLENDER_FILE'] = '*.blend' print "task-specific config:", taskconfig # get the task script script = task.msg.get_body() print "script len:", len(script) # do macro substitution on the task script for k in taskconfig: script = script.replace('$' + k, taskconfig[k]) # add shebang if absent if not script.startswith("#!"): script = "#!/bin/bash\n" + script # Make sure we're working with the correct project directory # FIXME - this is likely not the most efficient way of doing it, and probably leads to unnecessary # downloads from s3. Ideally we would keep all project directories and switch between them, # but currently brenda only supports one working project directory at a time proj_dir = get_project(taskconfig, taskconfig['BLENDER_PROJECT']) # mount additional EBS volumes aws.mount_additional_ebs(taskconfig, proj_dir) # cd to project directory, where we will run blender from with utils.Cd(proj_dir) as cd: # write script file and make it executable script_fn = "./brenda-go" with open(script_fn, 'w') as f: f.write(script) st = os.stat(script_fn) os.chmod(script_fn, st.st_mode | (stat.S_IEXEC|stat.S_IXGRP|stat.S_IXOTH)) # run the script print "------- Run script %s -------" % (os.path.realpath(script_fn),) print script, print "--------------------------" task.proc = Subprocess([script_fn]) print "active task:", local.task_active.__dict__ # Wait for active and S3-push tasks to complete, # while periodically reasserting with SQS to # acknowledge that tasks are still pending. # (If we don't reassert with SQS frequently enough, # it will assume we died, and put our tasks back # in the queue. "frequently enough" means within # visibility_timeout.) count = 0 while True: reassert = (count >= visibility_timeout_reassert) for i, task in enumerate((local.task_active, local.task_push)): if task: name = task_names[i] if task.proc is not None: # test if process has finished task.retcode = task.proc.poll() if task.retcode is not None: # process has finished task.proc = None # did process finish with errors? if task.retcode != 0: errtxt = "fatal error in %s task" % (name,) if name == 'active': raise error.ValueErrorRetry(errtxt) else: raise ValueError(errtxt) # Process finished successfully. If S3-push process, # tell SQS that the task completed successfully. if name == 'push': print "******* TASK", task.id, "COMMITTED to S3" q.delete_message(task.msg) task.msg = None local.task_count += 1 task_complete_accounting(local.task_count) # active task completed? if name == 'active': print "******* TASK", task.id, "READY-FOR-PUSH" # tell SQS that we are still working on the task if reassert and task.proc is not None: print "******* REASSERT", name, task.id task.msg.change_visibility(visibility_timeout) # break out of loop only when no pending tasks remain if ((not local.task_active or local.task_active.proc is None) and (not local.task_push or local.task_push.proc is None)): break # setup for next process poll iteration if reassert: count = 0 time.sleep(1) count += 1 # clean up the S3-push task cleanup(local.task_push, 'push') local.task_push = None # start a concurrent push task to commit files generated by # just-completed active task (such as blender render frames) to S3 if local.task_active: local.task_active.proc = start_s3_push_process(opts, args, taskconfig, local.task_active.outdir) local.task_push = local.task_active local.task_active = None # if no active task and no S3-push task, we are done (unless DONE is set to "poll") if not local.task_active and not local.task_push: action = read_done_file() if action == "poll": print "Polling for more work..." time.sleep(15) elif action == "smart": now = time.time() try: instance_id = aws.get_instance_id_self() spot_request_id = aws.get_spot_request_from_instance_id(conf, instance_id) launch_time = aws.get_launch_time(conf, spot_request_id) if launch_time: spottime = aws.get_uptime(now, launch_time) minutes_after_hour = (spottime / 60) % 60 print "Smart poll: ", minutes_after_hour if minutes_after_hour >= smart_shutdown_threshold: print "Smart poll threshold passed, shutting down (%d minutes after the hour with no work in queue)" % (minutes_after_hour) # update the value of DONE config var for clean shutdown conf['DONE'] = 'shutdown' write_done_file() break; else: print "Smart poll: no launch_time for spot request %s" % (spot_request_id) except Exception, e: print "Smart poll failed!", e time.sleep(15) else: break finally: cleanup_all()
def task_loop(): try: # reset tasks local.task_active = None local.task_push = None # get SQS work queue q = aws.get_sqs_queue(conf) # Loop over tasks. There are up to two different tasks at any # given moment that we are processing concurrently: # # 1. Active task -- usually a blender render operation. # 2. S3 push task -- a task which pushes the products of the # previous active task (such as rendered # frames) to S3. while True: # reset active task local.task_active = None # initialize active task object task = State() task.msg = None task.proc = None task.retcode = None task.outdir = None task.id = 0 # Get a task from the SQS work queue. This is normally # a short script that runs blender to render one # or more frames. task.msg = q.read() # output some debug info print "queue read:", task.msg if local.task_push: print "push task:", local.task_push.__dict__ else: print "no task push task" # process task if task.msg is not None: # assign an ID to task local.task_id_counter += 1 task.id = local.task_id_counter # register active task local.task_active = task # create output directory task.outdir = os.path.join( work_dir, "brenda-outdir%d.tmp" % (task.id, )) utils.rmtree(task.outdir) utils.mkdir(task.outdir) # get the task script script = task.msg.get_body() print "script len:", len(script) # do macro substitution on the task script script = script.replace('$OUTDIR', task.outdir) # add shebang if absent if not script.startswith("#!"): script = "#!/bin/bash\n" + script # cd to project directory, where we will run blender from with utils.Cd(proj_dir) as cd: # write script file and make it executable script_fn = "./brenda-go" with open(script_fn, 'w') as f: f.write(script) st = os.stat(script_fn) os.chmod( script_fn, st.st_mode | (stat.S_IEXEC | stat.S_IXGRP | stat.S_IXOTH)) # run the script print "------- Run script %s -------" % ( os.path.realpath(script_fn), ) print script, print "--------------------------" task.proc = Subprocess([script_fn]) print "active task:", local.task_active.__dict__ # Wait for active and S3-push tasks to complete, # while periodically reasserting with SQS to # acknowledge that tasks are still pending. # (If we don't reassert with SQS frequently enough, # it will assume we died, and put our tasks back # in the queue. "frequently enough" means within # visibility_timeout.) count = 0 while True: reassert = (count >= visibility_timeout_reassert) for i, task in enumerate( (local.task_active, local.task_push)): if task: name = task_names[i] if task.proc is not None: # test if process has finished task.retcode = task.proc.poll() if task.retcode is not None: # process has finished task.proc = None # did process finish with errors? if task.retcode != 0: errtxt = "fatal error in %s task" % ( name, ) if name == 'active': raise error.ValueErrorRetry(errtxt) else: raise ValueError(errtxt) # Process finished successfully. If S3-push process, # tell SQS that the task completed successfully. if name == 'push': print "******* TASK", task.id, "COMMITTED to S3" q.delete_message(task.msg) task.msg = None local.task_count += 1 task_complete_accounting( local.task_count) # active task completed? if name == 'active': print "******* TASK", task.id, "READY-FOR-PUSH" # tell SQS that we are still working on the task if reassert and task.proc is not None: print "******* REASSERT", name, task.id task.msg.change_visibility(visibility_timeout) # break out of loop only when no pending tasks remain if ((not local.task_active or local.task_active.proc is None) and (not local.task_push or local.task_push.proc is None)): break # setup for next process poll iteration if reassert: count = 0 time.sleep(1) count += 1 # clean up the S3-push task cleanup(local.task_push, 'push') local.task_push = None # start a concurrent push task to commit files generated by # just-completed active task (such as blender render frames) to S3 if local.task_active: local.task_active.proc = start_s3_push_process( opts, args, conf, local.task_active.outdir) local.task_push = local.task_active local.task_active = None # if no active task and no S3-push task, we are done (unless DONE is set to "poll") if not local.task_active and not local.task_push: if read_done_file() == "poll": print "Polling for more work..." time.sleep(15) else: break finally: cleanup_all()
def status(opts, args, conf): q = aws.get_sqs_queue(conf) if q is not None: print "Queued tasks:", q.count()
def task_loop(): try: # reset tasks local.task_active = None local.task_push = None # get SQS work queue q = aws.get_sqs_queue(conf) # Loop over tasks. There are up to two different tasks at any # given moment that we are processing concurrently: # # 1. Active task -- usually a blender render operation. # 2. S3 push task -- a task which pushes the products of the # previous active task (such as rendered # frames) to S3. while True: # reset active task local.task_active = None # initialize active task object task = State() task.msg = None task.proc = None task.retcode = None task.outdir = None task.id = 0 # Get a task from the SQS work queue. This is normally # a short script that runs blender to render one # or more frames. task.msg = q.read() # output some debug info print "queue read:", task.msg if local.task_push: print "push task:", local.task_push.__dict__ else: print "no task push task" # process task if task.msg is not None: # assign an ID to task local.task_id_counter += 1 task.id = local.task_id_counter # register active task local.task_active = task # create output directory task.outdir = os.path.join(work_dir, "brenda-outdir%d.tmp" % (task.id,)) utils.rmtree(task.outdir) utils.mkdir(task.outdir) # get the task script script = task.msg.get_body() print "script len:", len(script) # do macro substitution on the task script script = script.replace('$OUTDIR', task.outdir) # add shebang if absent if not script.startswith("#!"): script = "#!/bin/bash\n" + script # cd to project directory, where we will run blender from with utils.Cd(proj_dir) as cd: # write script file and make it executable script_fn = "./brenda-go" with open(script_fn, 'w') as f: f.write(script) st = os.stat(script_fn) os.chmod(script_fn, st.st_mode | (stat.S_IEXEC|stat.S_IXGRP|stat.S_IXOTH)) # run the script print "------- Run script %s -------" % (os.path.realpath(script_fn),) print script, print "--------------------------" task.proc = Subprocess([script_fn]) print "active task:", local.task_active.__dict__ # Wait for active and S3-push tasks to complete, # while periodically reasserting with SQS to # acknowledge that tasks are still pending. # (If we don't reassert with SQS frequently enough, # it will assume we died, and put our tasks back # in the queue. "frequently enough" means within # visibility_timeout.) count = 0 while True: reassert = (count >= visibility_timeout_reassert) for i, task in enumerate((local.task_active, local.task_push)): if task: name = task_names[i] if task.proc is not None: # test if process has finished task.retcode = task.proc.poll() if task.retcode is not None: # process has finished task.proc = None # did process finish with errors? if task.retcode != 0: errtxt = "fatal error in %s task" % (name,) if name == 'active': raise error.ValueErrorRetry(errtxt) else: raise ValueError(errtxt) # Process finished successfully. If S3-push process, # tell SQS that the task completed successfully. if name == 'push': print "******* TASK", task.id, "COMMITTED to S3" q.delete_message(task.msg) task.msg = None local.task_count += 1 task_complete_accounting(local.task_count) # active task completed? if name == 'active': print "******* TASK", task.id, "READY-FOR-PUSH" # tell SQS that we are still working on the task if reassert and task.proc is not None: print "******* REASSERT", name, task.id task.msg.change_visibility(visibility_timeout) # break out of loop only when no pending tasks remain if ((not local.task_active or local.task_active.proc is None) and (not local.task_push or local.task_push.proc is None)): break # setup for next process poll iteration if reassert: count = 0 time.sleep(1) count += 1 # clean up the S3-push task cleanup(local.task_push, 'push') local.task_push = None # start a concurrent push task to commit files generated by # just-completed active task (such as blender render frames) to S3 if local.task_active: local.task_active.proc = start_s3_push_process(opts, args, conf, local.task_active.outdir) local.task_push = local.task_active local.task_active = None # if no active task and no S3-push task, we are done (unless DONE is set to "poll") if not local.task_active and not local.task_push: if read_done_file() == "poll": print "Polling for more work..." time.sleep(15) else: break finally: cleanup_all()