def train(args): ctx = Context(s3_path=args.context, cache_dir=args.cache_dir, workload_id=args.workload_id) package.install_packages(ctx.python_packages, ctx.storage) model = ctx.models_id_map[args.model] logger.info("Training") with util.Tempdir(ctx.cache_dir) as temp_dir: model_dir = os.path.join(temp_dir, "model_dir") ctx.upload_resource_status_start(model) try: estimator_impl, _ = ctx.get_estimator_impl(model["name"]) train_util.train(model["name"], estimator_impl, ctx, model_dir) ctx.upload_resource_status_success(model) logger.info("Caching") logger.info("Caching model " + model["name"]) model_export_dir = os.path.join(model_dir, "export", "estimator") model_zip_path = os.path.join(temp_dir, "model.zip") util.zip_dir(model_export_dir, model_zip_path) ctx.storage.upload_file(model_zip_path, model["key"]) util.log_job_finished(ctx.workload_id) except CortexException as e: ctx.upload_resource_status_failed(model) e.wrap("error") logger.error(str(e)) logger.exception( "An error occurred, see `cortex logs -v model {}` for more details." .format(model["name"])) sys.exit(1) except Exception as e: ctx.upload_resource_status_failed(model) logger.exception( "An error occurred, see `cortex logs -v model {}` for more details." .format(model["name"])) sys.exit(1)
def zip_and_upload(self, local_path, key): util.zip_dir(local_path, "temp.zip") self.s3.upload_file("temp.zip", self.bucket, key) util.rm_file("temp.zip")