def run_on_cluster(to_cluster): if to_cluster: P.start_session() try: yield finally: P.close_session() else: yield
def tearDown(self): TestExecutionRunLocal.tearDown(self) P.close_session()
def main(argv=sys.argv): TASKS = {} for label, collection in [("tool", map_tool_to_runner), ("metric", map_metric_to_runner), ("collate", map_collate_to_runner), ("split", map_split_to_runner)]: for key, f in list(collection.items()): k = "{}_{}".format(label, key) if k in TASKS: raise ValueError("duplicate keys in TASK: {} {} {}".format( k, TASKS[k], f)) TASKS[k] = f parser = E.OptionParser(version="%prog version: $Id$", usage=globals()["__doc__"]) parser.add_option("-i", "--input-file", dest="input_files", type="string", action="append", help="input file. Can be used more than once [%default]") parser.add_option( "-s", "--input-slot", dest="input_slots", type="string", action="append", help= "input slot. Must be used as often as input_files for tools [%default]" ) parser.add_option( "-o", "--output-file", dest="output_files", type="string", action="append", help="output file. Can be used more than once [%default]") parser.add_option( "-n", "--dry-run", dest="dry_run", action="store_true", help="show statement to be executed, do not execute [%default]") parser.add_option("--engine", dest="engine", type="choice", choices=("local", "arvados"), help="engine to use [%default]") parser.add_option("-t", "--task", dest="task", type="choice", choices=sorted(TASKS.keys()), help="task to run [%default]") parser.add_option("-l", "--list-tasks", dest="list_tasks", action="store_true", help="list all available tasks and exit [%default]") parser.add_option("--always-mount", dest="always_mount", action="store_true", help="force mounting of arvados keep [%default]") parser.set_defaults( input_files=[], input_slots=[], output_files=[], engine="local", dry_run=False, task=None, always_mount=False, ) (options, args) = E.start(parser, argv, add_cluster_options=True) if options.list_tasks: options.stdout.write("available_tasks\n{}\n".format("\n".join( sorted(TASKS.keys())))) E.stop() return if len(options.input_files) == 0: raise ValueError("no input files specified, use --input-file") if len(options.output_files) == 0: raise ValueError("no output files specified, use --output-file") if options.task is None: raise ValueError("please specify a task to run (--task)") P.get_parameters() if options.engine == "arvados": raise ValueError("arvados support disabled") # crunch_json = Arvados.build_crunch_script(argv) crunch_json = None retval = E.run('arv-crunch-job --job="$(cat {})"'.format(crunch_json)) if retval != 0: raise ValueError("error while executing") os.unlink(crunch_json) E.stop() return retval # Start SGE session if not options.without_cluster: P.start_session() params = dict(parse_args(args)) signal.signal(signal.SIGINT, cleanup) # redirect all mount points in parameters and input files. mountpoint = redirect2mounts([params, options.input_files], always_mount=options.always_mount) mountpoint = redirect_defaults2mountpoint(mountpoint) # register mountpoint with pipeline P.PARAMS["mount_point"] = mountpoint P.PARAMS["dryrun"] = options.dry_run try: # instantiate task runner runner = TASKS[options.task](**params) if len(options.output_files) == 0: tmpfile = tempfile.NamedTemporaryFile(delete=False) os.unlink(tmpfile.name) options.output_files.append(tmpfile.name) if options.task.startswith("tool"): if len(options.input_slots) != len(options.input_files): raise ValueError( "for tools, provide the same number as input slots as there" "are input files (--input-slots)") input_files = dict(zip(options.input_slots, options.input_files)) runner.register_input(input_files) # check if expected is in params runner(list(input_files.values()), options.output_files[0]) elif options.task.startswith("metric"): runner(options.input_files[0], options.output_files[0]) elif options.task.startswith("collate"): runner(options.input_files, options.output_files[0]) elif options.task.startswith("split"): runner(options.input_files[0], options.output_files) # stop SGE session P.close_session() finally: cleanup() E.stop()