def create_job_items_from_directory(job_item, dir_path): items_log.info("Creating job items from directory") fileslist = [] for (dirpath, dirnames, filenames) in os.walk(dir_path): for f in filenames: fileslist.append({ "filename": f, "file_path": os.path.join(dirpath,f), }) for f in fileslist: s3.upload_file(f['filename'], f['file_path'], S3_WORKING_INPUT_BUCKET) create_job_item(job_item['job_id'], f['filename'], sqs.get_queue(SQS_JOB_ITEMS_QUEUE))
def cleanup_all(list_of_filenames): filelist = [f for f in os.listdir(".") if f.endswith(".swc")] loglist = [f for f in os.listdir(".") if f.endswith("log.txt")] reconstructlist = [f for f in os.listdir(".") if f.startswith("tmp_binarized_Reconstruction")] filelist.extend(loglist) filelist.extend(list_of_filenames) print "Files " + str(filelist) for f in filelist: try: os.remove(os.path.abspath(f)) except Exception, e: items_log.info("File to remove not found " + str(e))
def create_job_items_from_directory(job_item, dir_path): items_log.info("Creating job items from directory") fileslist = [] for (dirpath, dirnames, filenames) in os.walk(dir_path): for f in filenames: fileslist.append({ "filename": f, "file_path": os.path.join(dirpath, f), }) for f in fileslist: s3.upload_file(f['filename'], f['file_path'], S3_WORKING_INPUT_BUCKET) create_job_item(job_item['job_id'], f['filename'], sqs.get_queue(SQS_JOB_ITEMS_QUEUE))
def run_job(job, max_runtime): items_log.info("Tracing neuron... " + job['input_filename']) input_file_path = os.path.abspath(job['input_filename']) output_file_path = os.path.abspath(job['output_filename']) log_file_path = output_file_path + USER_JOB_LOG_EXT logfile = open(log_file_path, "w") cmd_args = build_cmd_args(job, input_file_path, output_file_path) items_log.info("Running Command: " + " ".join(cmd_args)) start_time = int(time.time()) cmd = Command(cmd_args, logfile) print "Running " + str(" ".join(cmd_args)) try: status = cmd.run(max_runtime) runtime = int(time.time()) - start_time if status == "OK": ok_msg = "\nTrace complete! Runtime = " + str(runtime) + " seconds" logfile.write("\n" + ok_msg) items_log.info(ok_msg) elif status == "TIMEOUT": max_runtime_msg = ( job['input_filename'] + " - " + job['plugin'] + " Throwing Exception b/c Max Runtime Exceeded: " + str(max_runtime) + " seconds") logfile.write("\n" + max_runtime_msg) items_log.info(max_runtime_msg) raise MaxRuntimeException(max_runtime_msg) else: job_failed_msg = "Throwing Exception b/c Job Item Failed: " + input_file_path logfile.write(job_failed_msg) raise Exception(job_failed_msg) finally: logfile.close()
def cleanup_all(list_of_filenames): filelist = [f for f in os.listdir(".") if f.endswith(".swc")] loglist = [f for f in os.listdir(".") if f.endswith("log.txt")] reconstructlist = [ f for f in os.listdir(".") if f.startswith("tmp_binarized_Reconstruction") ] filelist.extend(loglist) filelist.extend(list_of_filenames) print "Files " + str(filelist) for f in filelist: try: os.remove(os.path.abspath(f)) except Exception, e: items_log.info("File to remove not found " + str(e))
def get_timeout(file_bytes, bytes_per_sec, max_time, min_time, buffer_multiplier): """ Returns estimate job item runtime w buffer between min and max """ items_log.info("buffer " + str(buffer_multiplier)) items_log.info("Filesize in MB: " + str(file_bytes/BYTES_PER_MEGABYTE)) estimated_runtime = file_bytes / bytes_per_sec items_log.info("Est Runtime: " + str(int(estimated_runtime))) timeout = int(estimated_runtime * buffer_multiplier) items_log.info("Est Runtime w buffer: " + str(int(timeout))) return max_time
def process_next_job_item(): tasks_log.info("Getting next job_item from queue") queue = sqs.get_queue(client_constants.SQS_JOB_ITEMS_QUEUE) msg = sqs.get_next_message(queue) if msg is None: tasks_log.info("No job items found in Queue") return job_item_key = msg['MessageAttributes']['job_item_key']['StringValue'] tasks_log.info("Found new job_item " + job_item_key) job_item = job_item_manager.get_job_item_doc(job_item_key) job_item['attempts'] += 1 status = job_item_manager.process_job_item(job_item) if status == "COMPLETE": items_log.info("Deleting completed job_item from queue") sqs.delete_message(queue, msg) else: # We are going to let SQS handle retries items_log.info("Leaving job_item in queue")
def get_timeout(file_bytes, bytes_per_sec, max_time, min_time, buffer_multiplier): """ Returns estimate job item runtime w buffer between min and max """ items_log.info("buffer " + str(buffer_multiplier)) items_log.info("Filesize in MB: " + str(file_bytes / BYTES_PER_MEGABYTE)) estimated_runtime = file_bytes / bytes_per_sec items_log.info("Est Runtime: " + str(int(estimated_runtime))) timeout = int(estimated_runtime * buffer_multiplier) items_log.info("Est Runtime w buffer: " + str(int(timeout))) return max_time
def process_zip_file(job_item, zip_file_path, max_runtime): """ Unzip compressed file Create new job_item record(s) Upload new uncompressed file(s) to s3 """ output_dir = os.path.dirname(zip_file_path) zip_archive = zipfile.ZipFile(zip_file_path, "r") filenames = zip_archive.namelist() if len(filenames) > 1: items_log.info("found more than 1 file inside .zip") output_dir = os.path.join(output_dir, zip_file_path[:zip_file_path.find(zipper.ZIP_FILE_EXT)]) zipper.expand_zip_archive(zip_archive, output_dir) zip_archive.close() create_job_items_from_directory(job_item, output_dir) shutil.rmtree(output_dir) status = "COMPLETE" else: items_log.info("found only 1 file inside .zip") filename = filenames[0] file_path = os.path.join(output_dir, filename) zipper.extract_file_from_archive(zip_archive, filename, file_path) zip_archive.close() job_item['input_filename'] = filename job_item['output_filename'] = filename + OUTPUT_FILE_SUFFIXES[job_item['plugin']] items_log.info("New output filename: " + job_item['output_filename']) runtimes = PLUGINS[job_item['plugin']]['runtime'] max_runtime = timeout.get_timeout_from_file(file_path, runtimes['bytes_per_sec'], runtimes['max'], runtimes['min']) status = run_job_item(job_item, max_runtime) os.remove(zip_file_path) return status
def run_job(job, max_runtime): items_log.info("Tracing neuron... " + job["input_filename"]) input_file_path = os.path.abspath(job["input_filename"]) output_file_path = os.path.abspath(job["output_filename"]) log_file_path = output_file_path + USER_JOB_LOG_EXT logfile = open(log_file_path, "w") cmd_args = build_cmd_args(job, input_file_path, output_file_path) items_log.info("Running Command: " + " ".join(cmd_args)) start_time = int(time.time()) cmd = Command(cmd_args, logfile) print "Running " + str(" ".join(cmd_args)) try: status = cmd.run(max_runtime) runtime = int(time.time()) - start_time if status == "OK": ok_msg = "\nTrace complete! Runtime = " + str(runtime) + " seconds" logfile.write("\n" + ok_msg) items_log.info(ok_msg) elif status == "TIMEOUT": max_runtime_msg = ( job["input_filename"] + " - " + job["plugin"] + " Throwing Exception b/c Max Runtime Exceeded: " + str(max_runtime) + " seconds" ) logfile.write("\n" + max_runtime_msg) items_log.info(max_runtime_msg) raise MaxRuntimeException(max_runtime_msg) else: job_failed_msg = "Throwing Exception b/c Job Item Failed: " + input_file_path logfile.write(job_failed_msg) raise Exception(job_failed_msg) finally: logfile.close()
def run_job_item(job_item, max_runtime): items_log.info("running job item " + str(job_item)) local_file_path = os.path.abspath(job_item['input_filename']) job_item_status = "ERROR" try: if zipper.is_zip_file(local_file_path): job_item_status = process_zip_file(job_item, local_file_path, max_runtime) else: process_non_zip_file(job_item, max_runtime) items_log.info("Job Item Succeeded") job_item_status = "COMPLETE" except MaxRuntimeException as e: job_item_status = 'TIMEOUT' items_log.error("Job Item Timeout " + str(e) + traceback.format_exc()) except Exception as e: items_log.error("Job Item Error " + traceback.format_exc() + "\n" + str(e)) finally: items_log.info("Job_Item Status: " + job_item_status) job_item['status_id'] = get_job_item_status_id(job_item_status) #job_item['status_id'] = get_status_id_with_retry(job_item) save_job_item(job_item) return job_item_status
def process_zip_file(job_item, zip_file_path, max_runtime): """ Unzip compressed file Create new job_item record(s) Upload new uncompressed file(s) to s3 """ output_dir = os.path.dirname(zip_file_path) zip_archive = zipfile.ZipFile(zip_file_path, "r") filenames = zip_archive.namelist() if len(filenames) > 1: items_log.info("found more than 1 file inside .zip") output_dir = os.path.join( output_dir, zip_file_path[:zip_file_path.find(zipper.ZIP_FILE_EXT)]) zipper.expand_zip_archive(zip_archive, output_dir) zip_archive.close() create_job_items_from_directory(job_item, output_dir) shutil.rmtree(output_dir) status = "COMPLETE" else: items_log.info("found only 1 file inside .zip") filename = filenames[0] file_path = os.path.join(output_dir, filename) zipper.extract_file_from_archive(zip_archive, filename, file_path) zip_archive.close() job_item['input_filename'] = filename job_item['output_filename'] = filename + OUTPUT_FILE_SUFFIXES[ job_item['plugin']] items_log.info("New output filename: " + job_item['output_filename']) runtimes = PLUGINS[job_item['plugin']]['runtime'] max_runtime = timeout.get_timeout_from_file(file_path, runtimes['bytes_per_sec'], runtimes['max'], runtimes['min']) status = run_job_item(job_item, max_runtime) os.remove(zip_file_path) return status
def upload_file(file_key, file_path, bucket_name): items_log.info("Uploading file: %s" % file_key) k = Key(get_bucket(get_connection(), bucket_name)) k.key = file_key k.set_contents_from_filename(file_path) items_log.info("Upload complete!")
def download_file(file_key, file_path, bucket_name): items_log.info("Downloading file: %s" % file_key) k = Key(get_bucket(get_connection(), bucket_name)) k.key = file_key k.get_contents_to_filename(file_path) items_log.info("Downloading complete!")