Пример #1
0
def RunSetupAndUpload(packages, staging_bucket, package_path, job_name):
    """Runs setup.py and uploads the resulting tar.gz archives.

  Copies the source directory to a temporary directory and uses
  _RunSetup (which runs setuptools.sandbox.run_setup) to generate or run
  setup.py from the temporary directory. Uploads the resulting tar.gz
  archives and any extra from package_path.
  Args:
    packages: [str]. Path to extra tar.gz packages to upload.
    staging_bucket: storage_util.BucketReference. Bucket to which archives are
      uploaded.
    package_path: str. Relative path to source directory to be built.
    job_name: str. Name of the Cloud ML Job. Used to prefix uploaded packages.
  Returns:
      [str]. Fully qualified gcs paths from uploaded packages.
  Raises:
    ValueError: If packages is empty, and building package_path produces no
    tar archives.
    ArgumentError: if no packages were found in the given path.
  """
    def _MakePairs(paths):
        """Return tuples corresponding to the files and their upload paths."""
        return [(path, os.path.basename(path)) for path in paths]

    if package_path:
        with files.TemporaryDirectory() as temp_dir:
            setup_dir, package_name = os.path.split(
                os.path.abspath(package_path))
            dest_dir = os.path.join(temp_dir, 'dest')
            log.debug(
                ('Copying local source tree from'
                 '[{setup_dir}] to [{temp_dir}]').format(setup_dir=setup_dir,
                                                         temp_dir=dest_dir))
            shutil.copytree(setup_dir, dest_dir)
            package_paths = _RunSetup(dest_dir, package_name) + packages
            if not package_paths:
                raise flags.ArgumentError(_NO_PACKAGES_ERROR_MSG)
            return uploads.UploadFiles(_MakePairs(package_paths),
                                       staging_bucket, job_name)
    else:
        if not packages:
            raise flags.ArgumentError(_NO_PACKAGES_ERROR_MSG)
        return uploads.UploadFiles(_MakePairs(packages), staging_bucket,
                                   job_name)
Пример #2
0
def UploadPythonPackages(packages=(), package_path=None,
                         staging_location=None):
    """Uploads Python packages (if necessary), building them as-specified.

  A Cloud ML Engine job needs one or more Python packages to run. These Python
  packages can be specified in one of three ways:

    1. As a path to a local, pre-built Python package file.
    2. As a path to a Cloud Storage-hosted, pre-built Python package file (paths
       beginning with 'gs://').
    3. As a local Python source tree (the `--package-path` flag).

  In case 1, we upload the local files to Cloud Storage[1] and provide their
  paths. These can then be given to the Cloud ML Engine API, which can fetch
  these files.

  In case 2, we don't need to do anything. We can just send these paths directly
  to the Cloud ML Engine API.

  In case 3, we perform a build using setuptools[2], and upload the resulting
  artifacts to Cloud Storage[1]. The paths to these artifacts can be given to
  the Cloud ML Engine API. See the `BuildPackages` method.

  These methods of specifying Python packages may be combined.


  [1] Uploads are to a specially-prefixed location in a user-provided Cloud
  Storage staging bucket. If the user provides bucket `gs://my-bucket/`, a file
  `package.tar.gz` is uploaded to
  `gs://my-bucket/<job name>/<checksum>/package.tar.gz`.

  [2] setuptools must be installed on the local user system.

  Args:
    packages: list of str. Path to extra tar.gz packages to upload, if any. If
      empty, a package_path must be provided.
    package_path: str. Relative path to source directory to be built, if any. If
      omitted, one or more packages must be provided.
    staging_location: storage_util.ObjectReference. Cloud Storage prefix to
      which archives are uploaded. Not necessary if only remote packages are
      given.

  Returns:
    list of str. Fully qualified Cloud Storage URLs (`gs://..`) from uploaded
      packages.

  Raises:
    ValueError: If packages is empty, and building package_path produces no
      tar archives.
    SetuptoolsFailedError: If the setup.py file fails to successfully build.
    MissingInitError: If the package doesn't contain an `__init__.py` file.
    DuplicateEntriesError: If multiple files with the same name were provided.
    ArgumentError: if no packages were found in the given path or no
      staging_location was but uploads were required.
  """
    remote_paths = []
    local_paths = []
    for package in packages:
        if storage_util.ObjectReference.IsStorageUrl(package):
            remote_paths.append(package)
        else:
            local_paths.append(package)

    if package_path:
        package_root = os.path.dirname(os.path.abspath(package_path))
        with _TempDirOrBackup(package_root) as working_dir:
            local_paths.extend(
                BuildPackages(package_path,
                              os.path.join(working_dir, 'output')))
            remote_paths.extend(
                _UploadFilesByPath(local_paths, staging_location))
    elif local_paths:
        # Can't combine this with above because above requires the temporary
        # directory to still be around
        remote_paths.extend(_UploadFilesByPath(local_paths, staging_location))

    if not remote_paths:
        raise flags.ArgumentError(_NO_PACKAGES_ERROR_MSG)
    return remote_paths
Пример #3
0
    def Run(self, args):
        """This is what gets called when the user runs this command.

    Args:
      args: an argparse namespace. All the arguments that were provided to this
        command invocation.

    Returns:
      Some value that we want to have printed later.
    """
        region = properties.VALUES.compute.region.Get(required=True)
        staging_location = jobs_prep.GetStagingLocation(
            staging_bucket=args.staging_bucket,
            job_id=args.job,
            job_dir=args.job_dir)
        try:
            uris = jobs_prep.UploadPythonPackages(
                packages=args.packages,
                package_path=args.package_path,
                staging_location=staging_location)
        except jobs_prep.NoStagingLocationError:
            raise flags.ArgumentError(
                'If local packages are provided, the `--staging-bucket` or '
                '`--job-dir` flag must be given.')
        log.debug('Using {0} as trainer uris'.format(uris))

        scale_tier_enum = (jobs.GetMessagesModule(
        ).GoogleCloudMlV1beta1TrainingInput.ScaleTierValueValuesEnum)
        scale_tier = scale_tier_enum(
            args.scale_tier) if args.scale_tier else None
        job = jobs.BuildTrainingJob(
            path=args.config,
            module_name=args.module_name,
            job_name=args.job,
            trainer_uri=uris,
            region=region,
            job_dir=args.job_dir.ToUrl() if args.job_dir else None,
            scale_tier=scale_tier,
            user_args=args.user_args,
            runtime_version=args.runtime_version)

        jobs_client = jobs.JobsClient()
        project_ref = resources.REGISTRY.Parse(
            properties.VALUES.core.project.Get(required=True),
            collection='ml.projects')
        job = jobs_client.Create(project_ref, job)
        log.status.Print('Job [{}] submitted successfully.'.format(job.jobId))
        if args. async:
            log.status.Print(_FOLLOW_UP_MESSAGE.format(job_id=job.jobId))
            return job

        log_fetcher = stream.LogFetcher(
            filters=log_utils.LogFilters(job.jobId),
            polling_interval=_POLLING_INTERVAL,
            continue_func=log_utils.MakeContinueFunction(job.jobId))

        printer = resource_printer.Printer(log_utils.LOG_FORMAT, out=log.err)

        def _CtrlCHandler(signal, frame):
            del signal, frame  # Unused
            raise KeyboardInterrupt

        with execution_utils.CtrlCSection(_CtrlCHandler):
            try:
                printer.Print(log_utils.SplitMultiline(
                    log_fetcher.YieldLogs()))
            except KeyboardInterrupt:
                log.status.Print('Received keyboard interrupt.')
                log.status.Print(_FOLLOW_UP_MESSAGE.format(job_id=job.jobId))

        job_ref = resources.REGISTRY.Parse(job.jobId,
                                           collection='ml.projects.jobs')
        job = jobs_client.Get(job_ref)
        # If the job itself failed, we will return a failure status.
        if job.state is not job.StateValueValuesEnum.SUCCEEDED:
            self.exit_code = 1

        return job
Пример #4
0
def _ValidateSubmitPredictionArgs(model_dir, version):
  if model_dir and version:
    raise flags.ArgumentError('`--version` cannot be set with `--model-dir`')
Пример #5
0
def SubmitTraining(jobs_client, job, job_dir=None, staging_bucket=None,
                   packages=None, package_path=None, scale_tier=None,
                   config=None, module_name=None, runtime_version=None,
                   stream_logs=None, user_args=None):
  """Submit a training job."""
  region = properties.VALUES.compute.region.Get(required=True)
  staging_location = jobs_prep.GetStagingLocation(
      staging_bucket=staging_bucket, job_id=job,
      job_dir=job_dir)
  try:
    uris = jobs_prep.UploadPythonPackages(
        packages=packages, package_path=package_path,
        staging_location=staging_location)
  except jobs_prep.NoStagingLocationError:
    raise flags.ArgumentError(
        'If local packages are provided, the `--staging-bucket` or '
        '`--job-dir` flag must be given.')
  log.debug('Using {0} as trainer uris'.format(uris))

  scale_tier_enum = jobs_client.training_input_class.ScaleTierValueValuesEnum
  scale_tier = scale_tier_enum(scale_tier) if scale_tier else None

  job = jobs_client.BuildTrainingJob(
      path=config,
      module_name=module_name,
      job_name=job,
      trainer_uri=uris,
      region=region,
      job_dir=job_dir.ToUrl() if job_dir else None,
      scale_tier=scale_tier,
      user_args=user_args,
      runtime_version=runtime_version)

  project_ref = resources.REGISTRY.Parse(
      properties.VALUES.core.project.Get(required=True),
      collection='ml.projects')
  job = jobs_client.Create(project_ref, job)
  if not stream_logs:
    PrintSubmitFollowUp(job.jobId, print_follow_up_message=True)
    return job
  else:
    PrintSubmitFollowUp(job.jobId, print_follow_up_message=False)

  log_fetcher = stream.LogFetcher(
      filters=log_utils.LogFilters(job.jobId),
      polling_interval=properties.VALUES.ml_engine.polling_interval.GetInt(),
      continue_interval=_CONTINUE_INTERVAL,
      continue_func=log_utils.MakeContinueFunction(job.jobId))

  printer = resource_printer.Printer(log_utils.LOG_FORMAT,
                                     out=log.err)
  with execution_utils.RaisesKeyboardInterrupt():
    try:
      printer.Print(log_utils.SplitMultiline(log_fetcher.YieldLogs()))
    except KeyboardInterrupt:
      log.status.Print('Received keyboard interrupt.\n')
      log.status.Print(_FOLLOW_UP_MESSAGE.format(job_id=job.jobId,
                                                 project=project_ref.Name()))
    except exceptions.HttpError as err:
      log.status.Print('Polling logs failed:\n{}\n'.format(str(err)))
      log.info('Failure details:', exc_info=True)
      log.status.Print(_FOLLOW_UP_MESSAGE.format(job_id=job.jobId,
                                                 project=project_ref.Name()))

  job_ref = resources.REGISTRY.Parse(
      job.jobId,
      params={'projectsId': properties.VALUES.core.project.GetOrFail},
      collection='ml.projects.jobs')
  job = jobs_client.Get(job_ref)

  return job