def run_job(args): should_ingest, cols_to_validate, cols_to_aggregate, cols_to_transform, training_datasets = parse_args( args) resource_id_list = cols_to_validate + cols_to_aggregate + cols_to_transform + training_datasets try: ctx = Context(s3_path=args.context, cache_dir=args.cache_dir, workload_id=args.workload_id) except Exception as e: logger.exception("An error occurred, see the logs for more details.") sys.exit(1) try: spark = None # For the finally clause spark = get_spark_session(ctx.workload_id) spark.sparkContext.parallelize( [1, 2, 3, 4, 5]).count() # test that executors are allocated raw_df = ingest_raw_dataset(spark, ctx, cols_to_validate, should_ingest) if len(cols_to_aggregate) > 0: run_aggregators(spark, ctx, cols_to_aggregate, raw_df) if len(cols_to_transform) > 0: validate_transformers(spark, ctx, cols_to_transform, raw_df) create_training_datasets(spark, ctx, training_datasets, raw_df) util.log_job_finished(ctx.workload_id) except CortexException as e: e.wrap("error") logger.error(str(e)) logger.exception( "An error occurred, see `cortex logs -v {} {}` for more details.". format( ctx.id_map[resource_id_list[0]]["resource_type"], ctx.id_map[resource_id_list[0]]["name"], )) sys.exit(1) except Exception as e: logger.exception( "An error occurred, see `cortex logs -v {} {}` for more details.". format( ctx.id_map[resource_id_list[0]]["resource_type"], ctx.id_map[resource_id_list[0]]["name"], )) sys.exit(1) finally: if spark is not None: spark.stop()
def train(args): ctx = Context(s3_path=args.context, cache_dir=args.cache_dir, workload_id=args.workload_id) package.install_packages(ctx.python_packages, ctx.bucket) model = ctx.models_id_map[args.model] logger.info("Training") with util.Tempdir(ctx.cache_dir) as temp_dir: model_dir = os.path.join(temp_dir, "model_dir") ctx.upload_resource_status_start(model) try: model_impl = ctx.get_model_impl(model["name"]) train_util.train(model["name"], model_impl, ctx, model_dir) ctx.upload_resource_status_success(model) logger.info("Caching") logger.info("Caching model " + model["name"]) model_export_dir = os.path.join(model_dir, "export", "estimator") model_zip_path = os.path.join(temp_dir, "model.zip") util.zip_dir(model_export_dir, model_zip_path) aws.upload_file_to_s3(local_path=model_zip_path, key=model["key"], bucket=ctx.bucket) util.log_job_finished(ctx.workload_id) except CortexException as e: ctx.upload_resource_status_failed(model) e.wrap("error") logger.error(str(e)) logger.exception( "An error occurred, see `cx logs model {}` for more details.". format(model["name"])) sys.exit(1) except Exception as e: ctx.upload_resource_status_failed(model) logger.exception( "An error occurred, see `cx logs model {}` for more details.". format(model["name"])) sys.exit(1)
def build(args): ctx = Context(s3_path=args.context, cache_dir=args.cache_dir, workload_id=args.workload_id) python_packages_list = [ ctx.pp_id_map[id] for id in args.python_packages.split(",") ] python_packages = { python_package["name"]: python_package for python_package in python_packages_list } ctx.upload_resource_status_start(*python_packages_list) try: build_packages(python_packages, ctx.bucket) util.log_job_finished(ctx.workload_id) except Exception as e: logger.exception(e) ctx.upload_resource_status_failed(*python_packages_list) else: ctx.upload_resource_status_success(*python_packages_list)