示例#1
0
文件: train.py 项目: kwecht/cortex
def train(args):
    ctx = Context(s3_path=args.context,
                  cache_dir=args.cache_dir,
                  workload_id=args.workload_id)

    package.install_packages(ctx.python_packages, ctx.storage)

    model = ctx.models_id_map[args.model]

    logger.info("Training")

    with util.Tempdir(ctx.cache_dir) as temp_dir:
        model_dir = os.path.join(temp_dir, "model_dir")
        ctx.upload_resource_status_start(model)

        try:
            estimator_impl, _ = ctx.get_estimator_impl(model["name"])
            train_util.train(model["name"], estimator_impl, ctx, model_dir)
            ctx.upload_resource_status_success(model)

            logger.info("Caching")
            logger.info("Caching model " + model["name"])
            model_export_dir = os.path.join(model_dir, "export", "estimator")
            model_zip_path = os.path.join(temp_dir, "model.zip")
            util.zip_dir(model_export_dir, model_zip_path)

            ctx.storage.upload_file(model_zip_path, model["key"])
            util.log_job_finished(ctx.workload_id)

        except CortexException as e:
            ctx.upload_resource_status_failed(model)
            e.wrap("error")
            logger.error(str(e))
            logger.exception(
                "An error occurred, see `cortex logs -v model {}` for more details."
                .format(model["name"]))
            sys.exit(1)
        except Exception as e:
            ctx.upload_resource_status_failed(model)
            logger.exception(
                "An error occurred, see `cortex logs -v model {}` for more details."
                .format(model["name"]))
            sys.exit(1)
示例#2
0
文件: s3.py 项目: kwecht/cortex
 def zip_and_upload(self, local_path, key):
     util.zip_dir(local_path, "temp.zip")
     self.s3.upload_file("temp.zip", self.bucket, key)
     util.rm_file("temp.zip")