always_refetch = int(conf.get('BLENDER_PROJECT_ALWAYS_REFETCH', '0')) etag = None if not always_refetch: try: with open(os.path.join(proj_dir, fn + '.etag')) as efn: etag = efn.read().strip() except Exception, e: pass # create new directory to download project new_dir = proj_dir + '.pre.tmp' utils.rmtree(new_dir) utils.mkdir(new_dir) try: with utils.Cd(new_dir) as cd: # download the file from S3 file_len, etag = aws.s3_get(conf, s3url, fn, etag=etag) # save the etag for future reference with open(fn + '.etag', 'w') as efn: efn.write(etag + '\n') # Use "unzip" tool for .zip files, # and "tar xf" for everything else. if fn.lower().endswith('.zip'): utils.system(["unzip", fn]) else: utils.system(["tar", "xf", fn]) utils.rm(fn)
def task_loop(): try: # reset tasks local.task_active = None local.task_push = None # get SQS work queue q = aws.get_sqs_queue(conf) # Loop over tasks. There are up to two different tasks at any # given moment that we are processing concurrently: # # 1. Active task -- usually a blender render operation. # 2. S3 push task -- a task which pushes the products of the # previous active task (such as rendered # frames) to S3. while True: # reset active task local.task_active = None # initialize active task object task = State() task.msg = None task.proc = None task.retcode = None task.outdir = None task.id = 0 # Get a task from the SQS work queue. This is normally # a short script that runs blender to render one # or more frames. task.msg = q.read() # output some debug info print "queue read:", task.msg if local.task_push: print "push task:", local.task_push.__dict__ else: print "no task push task" # process task if task.msg is not None: # assign an ID to task local.task_id_counter += 1 task.id = local.task_id_counter # register active task local.task_active = task # create output directory task.outdir = os.path.join( work_dir, "brenda-outdir%d.tmp" % (task.id, )) utils.rmtree(task.outdir) utils.mkdir(task.outdir) # get the task script script = task.msg.get_body() print "script len:", len(script) # do macro substitution on the task script script = script.replace('$OUTDIR', task.outdir) # add shebang if absent if not script.startswith("#!"): script = "#!/bin/bash\n" + script # cd to project directory, where we will run blender from with utils.Cd(proj_dir) as cd: # write script file and make it executable script_fn = "./brenda-go" with open(script_fn, 'w') as f: f.write(script) st = os.stat(script_fn) os.chmod( script_fn, st.st_mode | (stat.S_IEXEC | stat.S_IXGRP | stat.S_IXOTH)) # run the script print "------- Run script %s -------" % ( os.path.realpath(script_fn), ) print script, print "--------------------------" task.proc = Subprocess([script_fn]) print "active task:", local.task_active.__dict__ # Wait for active and S3-push tasks to complete, # while periodically reasserting with SQS to # acknowledge that tasks are still pending. # (If we don't reassert with SQS frequently enough, # it will assume we died, and put our tasks back # in the queue. "frequently enough" means within # visibility_timeout.) count = 0 while True: reassert = (count >= visibility_timeout_reassert) for i, task in enumerate( (local.task_active, local.task_push)): if task: name = task_names[i] if task.proc is not None: # test if process has finished task.retcode = task.proc.poll() if task.retcode is not None: # process has finished task.proc = None # did process finish with errors? if task.retcode != 0: errtxt = "fatal error in %s task" % ( name, ) if name == 'active': raise error.ValueErrorRetry(errtxt) else: raise ValueError(errtxt) # Process finished successfully. If S3-push process, # tell SQS that the task completed successfully. if name == 'push': print "******* TASK", task.id, "COMMITTED to S3" q.delete_message(task.msg) task.msg = None local.task_count += 1 task_complete_accounting( local.task_count) # active task completed? if name == 'active': print "******* TASK", task.id, "READY-FOR-PUSH" # tell SQS that we are still working on the task if reassert and task.proc is not None: print "******* REASSERT", name, task.id task.msg.change_visibility(visibility_timeout) # break out of loop only when no pending tasks remain if ((not local.task_active or local.task_active.proc is None) and (not local.task_push or local.task_push.proc is None)): break # setup for next process poll iteration if reassert: count = 0 time.sleep(1) count += 1 # clean up the S3-push task cleanup(local.task_push, 'push') local.task_push = None # start a concurrent push task to commit files generated by # just-completed active task (such as blender render frames) to S3 if local.task_active: local.task_active.proc = start_s3_push_process( opts, args, conf, local.task_active.outdir) local.task_push = local.task_active local.task_active = None # if no active task and no S3-push task, we are done (unless DONE is set to "poll") if not local.task_active and not local.task_push: if read_done_file() == "poll": print "Polling for more work..." time.sleep(15) else: break finally: cleanup_all()
def task_loop(): try: # reset tasks local.task_render = None local.task_upload = None # get SQS work queue q = aws.get_sqs_conn_queue(conf)[0] # Loop over tasks. There are up to two different tasks at any # given moment that we are processing concurrently: # # 1. Render task -- usually a render operation. # 2. Upload task -- a task which uploads results to S3. while True: # reset render task local.task_render = None # initialize render task object task = State() task.msg = None task.proc = None task.retcode = None task.outdir = None task.id = 0 task.script_name = None # Get a task from the SQS work queue. This is normally # a short script that renders one or more frames. task.msg = q.read(message_attributes=['All']) # output some debug info logging.debug('Reading work queue') if local.task_upload: logging.info("Running upload task #%d", local.task_upload.id) logging.debug(local.task_upload.__dict__) else: logging.info('No upload task available') # process task if task.msg is not None: # assign an ID to task local.task_id_counter += 1 task.id = local.task_id_counter task.script_name = task.msg.message_attributes['script_name']['string_value'] # register render task local.task_render = task # create output directory task.outdir = os.path.join(work_dir, "{}_out_{}".format(task.script_name, task.id)) utils.rmtree(task.outdir) utils.mkdir(task.outdir) # get the task script script = task.msg.get_body() # cd to output directory, where we will run render task from with utils.Cd(task.outdir): # write script file and make it executable script_fn = "./{}".format(task.script_name) with open(script_fn, 'w') as f: f.write(script) st = os.stat(script_fn) os.chmod(script_fn, st.st_mode | (stat.S_IEXEC|stat.S_IXGRP|stat.S_IXOTH)) # run the script task.proc = Subprocess([script_fn]) logging.info('Running render task \"%s #%d\"', local.task_render.script_name, local.task_render.id) logging.info(script.replace("\n"," ")) logging.debug(local.task_render.__dict__) # Wait for render & upload tasks to complete, while periodically reasserting with SQS to # acknowledge that tasks are still pending. (If we don't reassert with SQS frequently enough, # it will assume we died, and put our tasks back in the queue. "frequently enough" means within # visibility_timeout.) count = 0 while True: reassert = (count >= visibility_timeout_reassert) for i, task in enumerate((local.task_render, local.task_upload)): if task: name = task_names[i] if task.proc is not None: # test if process has finished task.retcode = task.proc.poll() if task.retcode is not None: # process has finished task.proc = None # did process finish with errors? if task.retcode != 0: if name == 'render': errtxt = "Render task \"{} #{}\" exited with status code {}".format( task.script_name, task.id, task.retcode) raise error.ValueErrorRetry(errtxt) else: errtxt = "Upload task #{} exited with status code {}".format( task.id, task.retcode) raise ValueError(errtxt) # Process finished successfully. If upload process, # tell SQS that the task completed successfully. if name == 'upload': logging.info('Finished upload task #%d', task.id) q.delete_message(task.msg) task.msg = None local.task_count += 1 task_complete_accounting(local.task_count) # Render task completed? if name == 'render': logging.info('Finished render task \"%s #%d\"', task.script_name, task.id) # tell SQS that we are still working on the task if reassert and task.proc is not None: logging.debug('Reasserting %s task %d with SQS', name, task.id) task.msg.change_visibility(visibility_timeout) # break out of loop only when no pending tasks remain if ((not local.task_render or local.task_render.proc is None) and (not local.task_upload or local.task_upload.proc is None)): break # setup for next process poll iteration if reassert: count = 0 time.sleep(1) count += 1 # clean up the upload task cleanup(local.task_upload, 'upload') local.task_upload = None # start a concurrent upload task to commit files generated by just-completed render task to S3 if local.task_render: local.task_render.proc = start_upload_process(opts, args, conf, local.task_render) local.task_upload = local.task_render local.task_render = None # if no render or upload task, we are done (unless DONE is set to "poll") if not local.task_render and not local.task_upload: if read_done_file() == "poll": logging.info('Waiting for tasks...') time.sleep(15) else: logging.info('Exiting') break finally: cleanup_all()