Пример #1
0
def compare_anomalies(output_uri: Text, expected_uri: Text) -> bool:
    """Compares anomalies files in output uri and recorded uri.

  Args:
    output_uri: pipeline output artifact uri.
    expected_uri: recorded pipeline output artifact uri.

  Returns:
     boolean whether anomalies are same.
  """
    for dir_name, _, leaf_files in fileio.walk(expected_uri):
        for leaf_file in leaf_files:
            expected_file_name = os.path.join(dir_name, leaf_file)
            file_name = os.path.join(
                dir_name.replace(expected_uri, output_uri, 1), leaf_file)
            anomalies = anomalies_pb2.Anomalies()
            anomalies.ParseFromString(
                io_utils.read_bytes_file(os.path.join(output_uri, file_name)))
            expected_anomalies = anomalies_pb2.Anomalies()
            expected_anomalies.ParseFromString(
                io_utils.read_bytes_file(
                    os.path.join(expected_uri, expected_file_name)))
            if expected_anomalies.anomaly_info != anomalies.anomaly_info:
                return False
    return True
Пример #2
0
def compare_model_file_sizes(output_uri: Text, expected_uri: Text,
                             threshold: float) -> bool:
    """Compares pipeline output files sizes in output and recorded uri.

  Args:
    output_uri: pipeline output artifact uri.
    expected_uri: recorded pipeline output artifact uri.
    threshold: a float between 0 and 1.

  Returns:
     boolean whether file sizes differ within a threshold.
  """
    for dir_name, sub_dirs, leaf_files in fileio.walk(expected_uri):
        if ('Format-TFMA' in dir_name or 'eval_model_dir' in dir_name
                or 'export' in dir_name):
            continue
        for sub_dir in sub_dirs:
            new_file_path = os.path.join(
                dir_name.replace(expected_uri, output_uri, 1), sub_dir)
            if not fileio.exists(new_file_path):
                return False
        for leaf_file in leaf_files:
            if leaf_file.startswith('events.out.tfevents'):
                continue
            expected_file_name = os.path.join(dir_name, leaf_file)
            file_name = os.path.join(
                dir_name.replace(expected_uri, output_uri, 1), leaf_file)
            if not _compare_relative_difference(
                    fileio.open(file_name).size(),
                    fileio.open(expected_file_name).size(), threshold):
                return False
    return True
Пример #3
0
def compare_file_sizes(output_uri: Text, expected_uri: Text,
                       threshold: float) -> bool:
    """Compares pipeline output files sizes in output and recorded uri.

  Args:
    output_uri: pipeline output artifact uri.
    expected_uri: recorded pipeline output artifact uri.
    threshold: a float between 0 and 1.

  Returns:
     boolean whether file sizes differ within a threshold.
  """
    for dir_name, sub_dirs, leaf_files in fileio.walk(expected_uri):
        for sub_dir in sub_dirs:
            new_file_path = os.path.join(
                dir_name.replace(expected_uri, output_uri, 1), sub_dir)
            if not fileio.exists(new_file_path):
                return False
        for leaf_file in leaf_files:
            expected_file_name = os.path.join(dir_name, leaf_file)
            file_name = os.path.join(
                dir_name.replace(expected_uri, output_uri, 1), leaf_file)
            if not _compare_relative_difference(
                    fileio.open(file_name).size(),
                    fileio.open(expected_file_name).size(), threshold):
                return False
    return True
Пример #4
0
def verify_file_dir(output_uri: Text,
                    expected_uri: Text,
                    check_file: bool = False):
    """Verify pipeline output artifact uri by comparing directory structure.

  Args:
    output_uri: pipeline output artifact uri.
    expected_uri: recorded pipeline output artifact uri.
    check_file: boolean indicating whether to check file path.

  Returns:
    a boolean whether file paths are matching.
  """
    for dir_name, sub_dirs, leaf_files in fileio.walk(expected_uri):
        for sub_dir in sub_dirs:
            new_file_path = os.path.join(
                dir_name.replace(expected_uri, output_uri, 1), sub_dir)
            if not fileio.exists(new_file_path):
                logging.error('%s doesn\'t exists.', new_file_path)
                return False
        if check_file:
            for leaf_file in leaf_files:
                new_file_path = os.path.join(
                    dir_name.replace(expected_uri, output_uri, 1), leaf_file)
                if not fileio.exists(new_file_path):
                    logging.error('%s doesn\'t exists.', new_file_path)
                    return False
    return True
Пример #5
0
def copy_dir(
    src: str,
    dst: str,
    allow_regex_patterns: Iterable[str] = (),
    deny_regex_patterns: Iterable[str] = (),
) -> None:
  """Copies the whole directory recursively from source to destination.

  Args:
    src: Source directory to copy from. <src>/a/b.txt will be copied to
        <dst>/a/b.txt.
    dst: Destination directoy to copy to. <src>/a/b.txt will be copied to
        <dst>/a/b.txt.
    allow_regex_patterns: Optional list of allowlist regular expressions to
        filter from. Pattern is matched against the full path of the file.
        Files and subdirectories that do not match any of the patterns will not
        be copied.
    deny_regex_patterns: Optional list of denylist regular expressions to
        filter from. Pattern is matched against the full path of the file.
        Files and subdirectories that match any of the patterns will not be
        copied.
  """
  src = src.rstrip('/')
  dst = dst.rstrip('/')

  allow_regex_patterns = [re.compile(p) for p in allow_regex_patterns]
  deny_regex_patterns = [re.compile(p) for p in deny_regex_patterns]

  def should_copy(path):
    if allow_regex_patterns:
      if not any(p.search(path) for p in allow_regex_patterns):
        return False
    if deny_regex_patterns:
      if any(p.search(path) for p in deny_regex_patterns):
        return False
    return True

  if fileio.exists(dst):
    fileio.rmtree(dst)
  fileio.makedirs(dst)

  for dir_name, sub_dirs, leaf_files in fileio.walk(src):
    new_dir_name = dir_name.replace(src, dst, 1)
    new_dir_exists = fileio.isdir(new_dir_name)

    for leaf_file in leaf_files:
      leaf_file_path = os.path.join(dir_name, leaf_file)
      if should_copy(leaf_file_path):
        if not new_dir_exists:
          # Parent directory may not have been created yet if its name is not
          # in the allowlist, but its containing file is.
          fileio.makedirs(new_dir_name)
          new_dir_exists = True
        new_file_path = os.path.join(new_dir_name, leaf_file)
        fileio.copy(leaf_file_path, new_file_path)

    for sub_dir in sub_dirs:
      if should_copy(os.path.join(dir_name, sub_dir)):
        fileio.makedirs(os.path.join(new_dir_name, sub_dir))
 def _PrintTaskLogsOnError(self, task):
     task_log_dir = os.path.join(self._airflow_home, 'logs',
                                 '%s.%s' % (self._dag_id, task))
     for dir_name, _, leaf_files in fileio.walk(task_log_dir):
         for leaf_file in leaf_files:
             leaf_file_path = os.path.join(dir_name, leaf_file)
             absl.logging.error('Print task log %s:', leaf_file_path)
             with fileio.open(leaf_file_path, 'r') as f:
                 lines = f.readlines()
                 for line in lines:
                     absl.logging.error(line)
Пример #7
0
def copy_dir(src: Text, dst: Text) -> None:
  """Copies the whole directory recursively from source to destination."""
  src = src.rstrip('/')
  dst = dst.rstrip('/')

  if fileio.exists(dst):
    fileio.rmtree(dst)
  fileio.makedirs(dst)

  for dir_name, sub_dirs, leaf_files in fileio.walk(src):
    for leaf_file in leaf_files:
      leaf_file_path = os.path.join(dir_name, leaf_file)
      new_file_path = os.path.join(dir_name.replace(src, dst, 1), leaf_file)
      fileio.copy(leaf_file_path, new_file_path)

    for sub_dir in sub_dirs:
      fileio.makedirs(os.path.join(dir_name.replace(src, dst, 1), sub_dir))