def RunSetupAndUpload(packages, staging_bucket, package_path, job_name): """Runs setup.py and uploads the resulting tar.gz archives. Copies the source directory to a temporary directory and uses _RunSetup (which runs setuptools.sandbox.run_setup) to generate or run setup.py from the temporary directory. Uploads the resulting tar.gz archives and any extra from package_path. Args: packages: [str]. Path to extra tar.gz packages to upload. staging_bucket: storage_util.BucketReference. Bucket to which archives are uploaded. package_path: str. Relative path to source directory to be built. job_name: str. Name of the Cloud ML Job. Used to prefix uploaded packages. Returns: [str]. Fully qualified gcs paths from uploaded packages. Raises: ValueError: If packages is empty, and building package_path produces no tar archives. ArgumentError: if no packages were found in the given path. """ def _MakePairs(paths): """Return tuples corresponding to the files and their upload paths.""" return [(path, os.path.basename(path)) for path in paths] if package_path: with files.TemporaryDirectory() as temp_dir: setup_dir, package_name = os.path.split( os.path.abspath(package_path)) dest_dir = os.path.join(temp_dir, 'dest') log.debug( ('Copying local source tree from' '[{setup_dir}] to [{temp_dir}]').format(setup_dir=setup_dir, temp_dir=dest_dir)) shutil.copytree(setup_dir, dest_dir) package_paths = _RunSetup(dest_dir, package_name) + packages if not package_paths: raise flags.ArgumentError(_NO_PACKAGES_ERROR_MSG) return uploads.UploadFiles(_MakePairs(package_paths), staging_bucket, job_name) else: if not packages: raise flags.ArgumentError(_NO_PACKAGES_ERROR_MSG) return uploads.UploadFiles(_MakePairs(packages), staging_bucket, job_name)
def UploadPythonPackages(packages=(), package_path=None, staging_location=None): """Uploads Python packages (if necessary), building them as-specified. A Cloud ML Engine job needs one or more Python packages to run. These Python packages can be specified in one of three ways: 1. As a path to a local, pre-built Python package file. 2. As a path to a Cloud Storage-hosted, pre-built Python package file (paths beginning with 'gs://'). 3. As a local Python source tree (the `--package-path` flag). In case 1, we upload the local files to Cloud Storage[1] and provide their paths. These can then be given to the Cloud ML Engine API, which can fetch these files. In case 2, we don't need to do anything. We can just send these paths directly to the Cloud ML Engine API. In case 3, we perform a build using setuptools[2], and upload the resulting artifacts to Cloud Storage[1]. The paths to these artifacts can be given to the Cloud ML Engine API. See the `BuildPackages` method. These methods of specifying Python packages may be combined. [1] Uploads are to a specially-prefixed location in a user-provided Cloud Storage staging bucket. If the user provides bucket `gs://my-bucket/`, a file `package.tar.gz` is uploaded to `gs://my-bucket/<job name>/<checksum>/package.tar.gz`. [2] setuptools must be installed on the local user system. Args: packages: list of str. Path to extra tar.gz packages to upload, if any. If empty, a package_path must be provided. package_path: str. Relative path to source directory to be built, if any. If omitted, one or more packages must be provided. staging_location: storage_util.ObjectReference. Cloud Storage prefix to which archives are uploaded. Not necessary if only remote packages are given. Returns: list of str. Fully qualified Cloud Storage URLs (`gs://..`) from uploaded packages. Raises: ValueError: If packages is empty, and building package_path produces no tar archives. SetuptoolsFailedError: If the setup.py file fails to successfully build. MissingInitError: If the package doesn't contain an `__init__.py` file. DuplicateEntriesError: If multiple files with the same name were provided. ArgumentError: if no packages were found in the given path or no staging_location was but uploads were required. """ remote_paths = [] local_paths = [] for package in packages: if storage_util.ObjectReference.IsStorageUrl(package): remote_paths.append(package) else: local_paths.append(package) if package_path: package_root = os.path.dirname(os.path.abspath(package_path)) with _TempDirOrBackup(package_root) as working_dir: local_paths.extend( BuildPackages(package_path, os.path.join(working_dir, 'output'))) remote_paths.extend( _UploadFilesByPath(local_paths, staging_location)) elif local_paths: # Can't combine this with above because above requires the temporary # directory to still be around remote_paths.extend(_UploadFilesByPath(local_paths, staging_location)) if not remote_paths: raise flags.ArgumentError(_NO_PACKAGES_ERROR_MSG) return remote_paths
def Run(self, args): """This is what gets called when the user runs this command. Args: args: an argparse namespace. All the arguments that were provided to this command invocation. Returns: Some value that we want to have printed later. """ region = properties.VALUES.compute.region.Get(required=True) staging_location = jobs_prep.GetStagingLocation( staging_bucket=args.staging_bucket, job_id=args.job, job_dir=args.job_dir) try: uris = jobs_prep.UploadPythonPackages( packages=args.packages, package_path=args.package_path, staging_location=staging_location) except jobs_prep.NoStagingLocationError: raise flags.ArgumentError( 'If local packages are provided, the `--staging-bucket` or ' '`--job-dir` flag must be given.') log.debug('Using {0} as trainer uris'.format(uris)) scale_tier_enum = (jobs.GetMessagesModule( ).GoogleCloudMlV1beta1TrainingInput.ScaleTierValueValuesEnum) scale_tier = scale_tier_enum( args.scale_tier) if args.scale_tier else None job = jobs.BuildTrainingJob( path=args.config, module_name=args.module_name, job_name=args.job, trainer_uri=uris, region=region, job_dir=args.job_dir.ToUrl() if args.job_dir else None, scale_tier=scale_tier, user_args=args.user_args, runtime_version=args.runtime_version) jobs_client = jobs.JobsClient() project_ref = resources.REGISTRY.Parse( properties.VALUES.core.project.Get(required=True), collection='ml.projects') job = jobs_client.Create(project_ref, job) log.status.Print('Job [{}] submitted successfully.'.format(job.jobId)) if args. async: log.status.Print(_FOLLOW_UP_MESSAGE.format(job_id=job.jobId)) return job log_fetcher = stream.LogFetcher( filters=log_utils.LogFilters(job.jobId), polling_interval=_POLLING_INTERVAL, continue_func=log_utils.MakeContinueFunction(job.jobId)) printer = resource_printer.Printer(log_utils.LOG_FORMAT, out=log.err) def _CtrlCHandler(signal, frame): del signal, frame # Unused raise KeyboardInterrupt with execution_utils.CtrlCSection(_CtrlCHandler): try: printer.Print(log_utils.SplitMultiline( log_fetcher.YieldLogs())) except KeyboardInterrupt: log.status.Print('Received keyboard interrupt.') log.status.Print(_FOLLOW_UP_MESSAGE.format(job_id=job.jobId)) job_ref = resources.REGISTRY.Parse(job.jobId, collection='ml.projects.jobs') job = jobs_client.Get(job_ref) # If the job itself failed, we will return a failure status. if job.state is not job.StateValueValuesEnum.SUCCEEDED: self.exit_code = 1 return job
def _ValidateSubmitPredictionArgs(model_dir, version): if model_dir and version: raise flags.ArgumentError('`--version` cannot be set with `--model-dir`')
def SubmitTraining(jobs_client, job, job_dir=None, staging_bucket=None, packages=None, package_path=None, scale_tier=None, config=None, module_name=None, runtime_version=None, stream_logs=None, user_args=None): """Submit a training job.""" region = properties.VALUES.compute.region.Get(required=True) staging_location = jobs_prep.GetStagingLocation( staging_bucket=staging_bucket, job_id=job, job_dir=job_dir) try: uris = jobs_prep.UploadPythonPackages( packages=packages, package_path=package_path, staging_location=staging_location) except jobs_prep.NoStagingLocationError: raise flags.ArgumentError( 'If local packages are provided, the `--staging-bucket` or ' '`--job-dir` flag must be given.') log.debug('Using {0} as trainer uris'.format(uris)) scale_tier_enum = jobs_client.training_input_class.ScaleTierValueValuesEnum scale_tier = scale_tier_enum(scale_tier) if scale_tier else None job = jobs_client.BuildTrainingJob( path=config, module_name=module_name, job_name=job, trainer_uri=uris, region=region, job_dir=job_dir.ToUrl() if job_dir else None, scale_tier=scale_tier, user_args=user_args, runtime_version=runtime_version) project_ref = resources.REGISTRY.Parse( properties.VALUES.core.project.Get(required=True), collection='ml.projects') job = jobs_client.Create(project_ref, job) if not stream_logs: PrintSubmitFollowUp(job.jobId, print_follow_up_message=True) return job else: PrintSubmitFollowUp(job.jobId, print_follow_up_message=False) log_fetcher = stream.LogFetcher( filters=log_utils.LogFilters(job.jobId), polling_interval=properties.VALUES.ml_engine.polling_interval.GetInt(), continue_interval=_CONTINUE_INTERVAL, continue_func=log_utils.MakeContinueFunction(job.jobId)) printer = resource_printer.Printer(log_utils.LOG_FORMAT, out=log.err) with execution_utils.RaisesKeyboardInterrupt(): try: printer.Print(log_utils.SplitMultiline(log_fetcher.YieldLogs())) except KeyboardInterrupt: log.status.Print('Received keyboard interrupt.\n') log.status.Print(_FOLLOW_UP_MESSAGE.format(job_id=job.jobId, project=project_ref.Name())) except exceptions.HttpError as err: log.status.Print('Polling logs failed:\n{}\n'.format(str(err))) log.info('Failure details:', exc_info=True) log.status.Print(_FOLLOW_UP_MESSAGE.format(job_id=job.jobId, project=project_ref.Name())) job_ref = resources.REGISTRY.Parse( job.jobId, params={'projectsId': properties.VALUES.core.project.GetOrFail}, collection='ml.projects.jobs') job = jobs_client.Get(job_ref) return job