def task_runner(task_name, log_file, argv): task_message = { 0: "运行成功", 1: "未知异常", 2: "数据缺失", 3: "任务失败", 4: "有重复的任务正在运行", 5: "任务计划失败", 6: "任务未运行", 98: "其它错误", 99: "未知错误" } log_fd = open(log_file, 'a') sys.stdout = log_fd sys.stderr = log_fd now = time.time() year, month, day, hh, mm, ss, x, y, z = time.localtime(now) ts = "%02d/%02d/%04d %02d:%02d:%02d" % (day, month, year, hh, mm, ss) print >> log_fd, ts, task_name, argv try: ret_code = int(run_with_retcodes(argv)) ret_msg = task_message[ ret_code] if ret_code in task_message else task_message[99] except Exception as e: ret_code = 98 ret_msg = e.message return json.dumps({"code": ret_code, "message": ret_msg})
def luigi_run(argv=sys.argv[1:]): run_with_retcodes(argv)
def apply(input_bundle, output_bundle, pipe_params, pipe_cls, input_tags, output_tags, force, output_bundle_uuid=None, sysexit=True): """ Given an input bundle, run the pipesline on the bundle. Note, we first make a copy of all tasks that are parameterized identically to the tasks we will run. This is so we can figure out what we will need to re-run. This is why we make a single uuid for the output bundle of apply (for the driver). Args: input_bundle: The bundle with the data to be processed output_bundle: The new bundle to be created pipe_params: Dict of zero or more bundles to be handed to each instance of the task pipe_cls: String <module.ClassName> force: force recomputation of dependencies input_tags (dict): Tags used to find the input bundle output_tags (dict): Tags that need to be placed on the output bundle force (bool): whether to re-run this pipe output_bundle_uuid (str): Optionally specify exactly the UUID of the output bundle IFF we actually need to produce it sysexit: Run with sys exist return codes (will raise SystemExit), else run internally Returns: None """ _logger.debug("driver {}".format(driver.DriverTask)) _logger.debug("pipe_cls {}".format(pipe_cls)) _logger.debug("pipe params: {}".format(pipe_params)) _logger.debug("force: {}".format(force)) _logger.debug("input tags: {}".format(input_tags)) _logger.debug("output tags: {}".format(output_tags)) _logger.debug("sys.path {}".format(sys.path)) args = [ driver.DriverTask.task_family, '--local-scheduler', '--input-bundle', input_bundle, '--output-bundle', output_bundle, '--param-bundles', pipe_params, '--pipe-cls', pipe_cls, '--input-tags', json.dumps(input_tags), '--output-tags', json.dumps(output_tags) ] if force: args += ['--force'] ## Re-execute logic -- make copy of task DAG here. # Creates a cache of {pipe:path_cache_entry} in the pipesFS object. # This is used throughout execution to find / name the output bundles. reexecute_dag = driver.DriverTask(input_bundle, output_bundle, pipe_params, pipe_cls, input_tags, output_tags, force) resolve_workflow_bundles(reexecute_dag) # at this point the path cache should be full of existing or new UUIDs. # we are going to replace the final pipe's UUID if the user has passed one in. # this happens when we run the docker container. # TODO: don't replace if it already exists. if output_bundle_uuid is not None: users_root_task = reexecute_dag.deps()[0] pfs = fs.DisdatFS() pce = pfs.get_path_cache(users_root_task) if pce.rerun: # if we have to re-run then replace it with our UUID # TODO: this is the same code as new_output_hframe, FIX!!! dir, uuid, _ = pfs._curr_context.make_managed_path( output_bundle_uuid) fs.DisdatFS.put_path_cache(users_root_task, uuid, dir, pce.rerun, pce.is_left_edge_task, overwrite=True) if False: test = reexecute_dag print "----START DAG TASK---" print "task_id is {}".format(test.task_id) print "task_family is {}".format(test.task_family) print " class {}".format(test.__class__) print " module {}".format(test.__module__) print " inspect getfile(test) {}".format( inspect.getfile(test.__class__)) print "resolve_bundles requires {}".format(fs.DisdatFS.task_path_cache) print "----END DAG TASK---" # This is a superior way of calling the task, because we can make it once # and not have to repeat the args into a 'fake' cli call. if sysexit: retcodes.run_with_retcodes(args) else: build([reexecute_dag], local_scheduler=True) # After running a pipeline, blow away our path cache. Needed if we're run twice in the same process. # Probably not needed if you're using sysexit. fs.DisdatFS().clear_path_cache()