示例#1
0
def copy_template(flags_dict: Dict[str, Any]) -> None:
    """Copy template flags_dict["model"] to flags_dict["dest_dir"].

  Copies all *.py and README files in specified template, and replace
  the content of the files.

  Args:
    flags_dict: Should have pipeline_name, model and dest_dir.
  """
    model = flags_dict[labels.MODEL]
    pipeline_name = _sanitize_pipeline_name(flags_dict[labels.PIPELINE_NAME])
    template_dir = os.path.join(_templates_src_dir(), model)
    if not os.path.isdir(template_dir):
        raise ValueError('Model {} does not exist.'.format(model))
    destination_dir = flags_dict[labels.DESTINATION_PATH]

    ignore_paths = {
        os.path.join(template_dir, x)
        for x in _IGNORE_FILE_PATHS.get(model, [])
    }
    replace_dict = {
        _IMPORT_FROM_PACKAGE: _IMPORT_FROM_LOCAL_DIR,
        _PLACEHOLDER_PIPELINE_NAME: pipeline_name,
    }
    _copy_and_replace_placeholder_dir(template_dir, destination_dir,
                                      ignore_paths, replace_dict)
    for additional_file in _ADDITIONAL_FILE_PATHS.get(model, []):
        dst_path = os.path.join(destination_dir, additional_file.dst)
        fileio.makedirs(os.path.dirname(dst_path))

        if additional_file.src.startswith(('http://', 'https://')):
            urllib.request.urlretrieve(additional_file.src, dst_path)
        else:
            src_path = os.path.join(_tfx_src_dir(), additional_file.src)
            fileio.copy(src_path, dst_path)
示例#2
0
文件: io_utils.py 项目: jay90099/tfx
def copy_dir(
    src: str,
    dst: str,
    allow_regex_patterns: Iterable[str] = (),
    deny_regex_patterns: Iterable[str] = (),
) -> None:
  """Copies the whole directory recursively from source to destination.

  Args:
    src: Source directory to copy from. <src>/a/b.txt will be copied to
        <dst>/a/b.txt.
    dst: Destination directoy to copy to. <src>/a/b.txt will be copied to
        <dst>/a/b.txt.
    allow_regex_patterns: Optional list of allowlist regular expressions to
        filter from. Pattern is matched against the full path of the file.
        Files and subdirectories that do not match any of the patterns will not
        be copied.
    deny_regex_patterns: Optional list of denylist regular expressions to
        filter from. Pattern is matched against the full path of the file.
        Files and subdirectories that match any of the patterns will not be
        copied.
  """
  src = src.rstrip('/')
  dst = dst.rstrip('/')

  allow_regex_patterns = [re.compile(p) for p in allow_regex_patterns]
  deny_regex_patterns = [re.compile(p) for p in deny_regex_patterns]

  def should_copy(path):
    if allow_regex_patterns:
      if not any(p.search(path) for p in allow_regex_patterns):
        return False
    if deny_regex_patterns:
      if any(p.search(path) for p in deny_regex_patterns):
        return False
    return True

  if fileio.exists(dst):
    fileio.rmtree(dst)
  fileio.makedirs(dst)

  for dir_name, sub_dirs, leaf_files in fileio.walk(src):
    new_dir_name = dir_name.replace(src, dst, 1)
    new_dir_exists = fileio.isdir(new_dir_name)

    for leaf_file in leaf_files:
      leaf_file_path = os.path.join(dir_name, leaf_file)
      if should_copy(leaf_file_path):
        if not new_dir_exists:
          # Parent directory may not have been created yet if its name is not
          # in the allowlist, but its containing file is.
          fileio.makedirs(new_dir_name)
          new_dir_exists = True
        new_file_path = os.path.join(new_dir_name, leaf_file)
        fileio.copy(leaf_file_path, new_file_path)

    for sub_dir in sub_dirs:
      if should_copy(os.path.join(dir_name, sub_dir)):
        fileio.makedirs(os.path.join(new_dir_name, sub_dir))
示例#3
0
def copy_file(src: Text, dst: Text, overwrite: bool = False):
    """Copies a single file from source to destination."""

    if overwrite and fileio.exists(dst):
        fileio.remove(dst)
    dst_dir = os.path.dirname(dst)
    fileio.makedirs(dst_dir)
    fileio.copy(src, dst, overwrite=overwrite)
def run_fn(fn_args: tfx.components.FnArgs):
    """Train the model based on given args.

  Args:
    fn_args: Holds args used to train the model as name/value pairs.
  """
    tf_transform_output = tft.TFTransformOutput(fn_args.transform_output)

    train_dataset = _input_fn(fn_args.train_files,
                              fn_args.data_accessor,
                              tf_transform_output,
                              batch_size=_TRAIN_BATCH_SIZE)

    eval_dataset = _input_fn(fn_args.eval_files,
                             fn_args.data_accessor,
                             tf_transform_output,
                             batch_size=_EVAL_BATCH_SIZE)

    mirrored_strategy = tf.distribute.MirroredStrategy()
    with mirrored_strategy.scope():
        model = _build_keras_model()

    model.fit(train_dataset,
              steps_per_epoch=fn_args.train_steps,
              validation_data=eval_dataset,
              validation_steps=fn_args.eval_steps,
              verbose=2)

    signatures = {
        'serving_default':
        _get_inference_fn(model, tf_transform_output).get_concrete_function(
            tf.TensorSpec(shape=[None],
                          dtype=tf.int64,
                          name=_CUR_PAGE_FEATURE_KEY),
            tf.TensorSpec(shape=[None],
                          dtype=tf.int64,
                          name=_SESSION_INDEX_FEATURE_KEY)),
    }

    # Create the saved_model in a temporary directory.
    temp_saving_model_dir = os.path.join(fn_args.serving_model_dir, 'temp')
    model.save(temp_saving_model_dir, save_format='tf', signatures=signatures)

    # Convert the saved_model to a tfjs model and store it in the final directory.
    tfrw = rewriter_factory.create_rewriter(rewriter_factory.TFJS_REWRITER,
                                            name='tfjs_rewriter')
    converters.rewrite_saved_model(temp_saving_model_dir,
                                   fn_args.serving_model_dir, tfrw,
                                   rewriter.ModelType.TFJS_MODEL)

    # Copy the vocabulary computed by transform to the final directory.
    # The vocabulary is not included in the original savedmodel because vocab
    # lookups are currently not supported in TFJS and are expected to be done
    # independently by client code.
    fileio.copy(tf_transform_output.vocabulary_file_by_name(_VOCAB_FILENAME),
                os.path.join(fn_args.serving_model_dir, _VOCAB_FILENAME))

    fileio.rmtree(temp_saving_model_dir)
示例#5
0
 def side_effect(cmd, stdout, stderr):
     self.assertLen(cmd, 3)
     self.assertEqual(sys.executable, cmd[0])
     self.assertEqual('sdist', cmd[2])
     self.assertEqual(stdout, stderr)
     setup_file = cmd[1]
     dist_dir = os.path.join(os.path.dirname(setup_file), 'dist')
     fileio.makedirs(dist_dir)
     dest_file = os.path.join(dist_dir, expected_package)
     fileio.copy(test_file, dest_file)
示例#6
0
文件: io_utils.py 项目: konny0311/tfx
def copy_dir(src: Text, dst: Text) -> None:
  """Copies the whole directory recursively from source to destination."""
  src = src.rstrip('/')
  dst = dst.rstrip('/')

  if fileio.exists(dst):
    fileio.rmtree(dst)
  fileio.makedirs(dst)

  for dir_name, sub_dirs, leaf_files in fileio.walk(src):
    for leaf_file in leaf_files:
      leaf_file_path = os.path.join(dir_name, leaf_file)
      new_file_path = os.path.join(dir_name.replace(src, dst, 1), leaf_file)
      fileio.copy(leaf_file_path, new_file_path)

    for sub_dir in sub_dirs:
      fileio.makedirs(os.path.join(dir_name.replace(src, dst, 1), sub_dir))
示例#7
0
def package_user_module_file(instance_name: Text, module_path: Text,
                             pipeline_root: Text) -> Tuple[Text, Text]:
    """Package the given user module file into a Python Wheel package.

  Args:
      instance_name: Name of the component instance, for creating a unique wheel
        package name.
      module_path: Path to the module file to be packaged.
      pipeline_root: Text

  Returns:
      dist_file_path: Path to the generated wheel file.
      user_module_path: Path for referencing the user module when stored
        as the _MODULE_PATH_KEY execution property. Format should be treated
        as opaque by the user.

  Raises:
      RuntimeError: When wheel building fails.
  """
    module_path = os.path.abspath(io_utils.ensure_local(module_path))
    if not module_path.endswith('.py'):
        raise ValueError('Module path %r is not a ".py" file.' % module_path)
    if not os.path.exists(module_path):
        raise ValueError('Module path %r does not exist.' % module_path)

    user_module_dir, module_file_name = os.path.split(module_path)
    user_module_name = re.sub(r'\.py$', '', module_file_name)
    source_files = []

    # Discover all Python source files in this directory for inclusion.
    for file_name in os.listdir(user_module_dir):
        if file_name.endswith('.py'):
            source_files.append(file_name)
    module_names = []
    for file_name in source_files:
        if file_name in (_EPHEMERAL_SETUP_PY_FILE_NAME, '__init__.py'):
            continue
        module_name = re.sub(r'\.py$', '', file_name)
        module_names.append(module_name)

    # Set up build directory.
    build_dir = tempfile.mkdtemp()
    for source_file in source_files:
        shutil.copyfile(os.path.join(user_module_dir, source_file),
                        os.path.join(build_dir, source_file))

    # Generate an ephemeral wheel for this module.
    logging.info(
        'Generating ephemeral wheel package for %r (including modules: %s).',
        module_path, module_names)

    version_hash = _get_version_hash(user_module_dir, source_files)
    logging.info('User module package has hash fingerprint version %s.',
                 version_hash)

    setup_py_path = os.path.join(build_dir, _EPHEMERAL_SETUP_PY_FILE_NAME)
    with open(setup_py_path, 'w') as f:
        f.write(
            _get_ephemeral_setup_py_contents(
                'tfx-user-code-%s' % instance_name, '0.0+%s' % version_hash,
                module_names))

    temp_dir = tempfile.mkdtemp()
    dist_dir = tempfile.mkdtemp()
    bdist_command = [
        sys.executable, setup_py_path, 'bdist_wheel', '--bdist-dir', temp_dir,
        '--dist-dir', dist_dir
    ]
    logging.info('Executing: %s', bdist_command)
    try:
        subprocess.check_call(bdist_command, cwd=build_dir)
    except subprocess.CalledProcessError as e:
        raise RuntimeError('Failed to build wheel.') from e

    dist_files = os.listdir(dist_dir)
    if len(dist_files) != 1:
        raise RuntimeError(
            'Unexpectedly found %d output files in wheel output directory %s.'
            % (len(dist_files), dist_dir))
    build_dist_file_path = os.path.join(dist_dir, dist_files[0])
    # Copy wheel file atomically to wheel staging directory.
    dist_wheel_directory = os.path.join(pipeline_root, '_wheels')
    dist_file_path = os.path.join(dist_wheel_directory, dist_files[0])
    temp_dist_file_path = dist_file_path + '.tmp'
    fileio.makedirs(dist_wheel_directory)
    fileio.copy(build_dist_file_path, temp_dist_file_path, overwrite=True)
    fileio.rename(temp_dist_file_path, dist_file_path, overwrite=True)
    logging.info(
        ('Successfully built user code wheel distribution at %r; target user '
         'module is %r.'), dist_file_path, user_module_name)

    # Encode the user module key as a specification of a user module name within
    # a packaged wheel path.
    assert '@' not in user_module_name, ('Unexpected invalid module name: %s' %
                                         user_module_name)
    user_module_path = '%s@%s' % (user_module_name, dist_file_path)
    logging.info('Full user module path is %r', user_module_path)

    return dist_file_path, user_module_path
示例#8
0
文件: test_utils.py 项目: htahir1/tfx
 def Do(self, input_dict: Dict[Text, List[types.Artifact]],
        output_dict: Dict[Text, List[types.Artifact]],
        exec_properties: Dict[Text, Any]) -> None:
   input_path = artifact_utils.get_single_uri(input_dict['input'])
   output_path = artifact_utils.get_single_uri(output_dict['output'])
   fileio.copy(input_path, output_path)