def compare_anomalies(output_uri: Text, expected_uri: Text) -> bool: """Compares anomalies files in output uri and recorded uri. Args: output_uri: pipeline output artifact uri. expected_uri: recorded pipeline output artifact uri. Returns: boolean whether anomalies are same. """ for dir_name, _, leaf_files in fileio.walk(expected_uri): for leaf_file in leaf_files: expected_file_name = os.path.join(dir_name, leaf_file) file_name = os.path.join( dir_name.replace(expected_uri, output_uri, 1), leaf_file) anomalies = anomalies_pb2.Anomalies() anomalies.ParseFromString( io_utils.read_bytes_file(os.path.join(output_uri, file_name))) expected_anomalies = anomalies_pb2.Anomalies() expected_anomalies.ParseFromString( io_utils.read_bytes_file( os.path.join(expected_uri, expected_file_name))) if expected_anomalies.anomaly_info != anomalies.anomaly_info: return False return True
def compare_model_file_sizes(output_uri: Text, expected_uri: Text, threshold: float) -> bool: """Compares pipeline output files sizes in output and recorded uri. Args: output_uri: pipeline output artifact uri. expected_uri: recorded pipeline output artifact uri. threshold: a float between 0 and 1. Returns: boolean whether file sizes differ within a threshold. """ for dir_name, sub_dirs, leaf_files in fileio.walk(expected_uri): if ('Format-TFMA' in dir_name or 'eval_model_dir' in dir_name or 'export' in dir_name): continue for sub_dir in sub_dirs: new_file_path = os.path.join( dir_name.replace(expected_uri, output_uri, 1), sub_dir) if not fileio.exists(new_file_path): return False for leaf_file in leaf_files: if leaf_file.startswith('events.out.tfevents'): continue expected_file_name = os.path.join(dir_name, leaf_file) file_name = os.path.join( dir_name.replace(expected_uri, output_uri, 1), leaf_file) if not _compare_relative_difference( fileio.open(file_name).size(), fileio.open(expected_file_name).size(), threshold): return False return True
def compare_file_sizes(output_uri: Text, expected_uri: Text, threshold: float) -> bool: """Compares pipeline output files sizes in output and recorded uri. Args: output_uri: pipeline output artifact uri. expected_uri: recorded pipeline output artifact uri. threshold: a float between 0 and 1. Returns: boolean whether file sizes differ within a threshold. """ for dir_name, sub_dirs, leaf_files in fileio.walk(expected_uri): for sub_dir in sub_dirs: new_file_path = os.path.join( dir_name.replace(expected_uri, output_uri, 1), sub_dir) if not fileio.exists(new_file_path): return False for leaf_file in leaf_files: expected_file_name = os.path.join(dir_name, leaf_file) file_name = os.path.join( dir_name.replace(expected_uri, output_uri, 1), leaf_file) if not _compare_relative_difference( fileio.open(file_name).size(), fileio.open(expected_file_name).size(), threshold): return False return True
def verify_file_dir(output_uri: Text, expected_uri: Text, check_file: bool = False): """Verify pipeline output artifact uri by comparing directory structure. Args: output_uri: pipeline output artifact uri. expected_uri: recorded pipeline output artifact uri. check_file: boolean indicating whether to check file path. Returns: a boolean whether file paths are matching. """ for dir_name, sub_dirs, leaf_files in fileio.walk(expected_uri): for sub_dir in sub_dirs: new_file_path = os.path.join( dir_name.replace(expected_uri, output_uri, 1), sub_dir) if not fileio.exists(new_file_path): logging.error('%s doesn\'t exists.', new_file_path) return False if check_file: for leaf_file in leaf_files: new_file_path = os.path.join( dir_name.replace(expected_uri, output_uri, 1), leaf_file) if not fileio.exists(new_file_path): logging.error('%s doesn\'t exists.', new_file_path) return False return True
def copy_dir( src: str, dst: str, allow_regex_patterns: Iterable[str] = (), deny_regex_patterns: Iterable[str] = (), ) -> None: """Copies the whole directory recursively from source to destination. Args: src: Source directory to copy from. <src>/a/b.txt will be copied to <dst>/a/b.txt. dst: Destination directoy to copy to. <src>/a/b.txt will be copied to <dst>/a/b.txt. allow_regex_patterns: Optional list of allowlist regular expressions to filter from. Pattern is matched against the full path of the file. Files and subdirectories that do not match any of the patterns will not be copied. deny_regex_patterns: Optional list of denylist regular expressions to filter from. Pattern is matched against the full path of the file. Files and subdirectories that match any of the patterns will not be copied. """ src = src.rstrip('/') dst = dst.rstrip('/') allow_regex_patterns = [re.compile(p) for p in allow_regex_patterns] deny_regex_patterns = [re.compile(p) for p in deny_regex_patterns] def should_copy(path): if allow_regex_patterns: if not any(p.search(path) for p in allow_regex_patterns): return False if deny_regex_patterns: if any(p.search(path) for p in deny_regex_patterns): return False return True if fileio.exists(dst): fileio.rmtree(dst) fileio.makedirs(dst) for dir_name, sub_dirs, leaf_files in fileio.walk(src): new_dir_name = dir_name.replace(src, dst, 1) new_dir_exists = fileio.isdir(new_dir_name) for leaf_file in leaf_files: leaf_file_path = os.path.join(dir_name, leaf_file) if should_copy(leaf_file_path): if not new_dir_exists: # Parent directory may not have been created yet if its name is not # in the allowlist, but its containing file is. fileio.makedirs(new_dir_name) new_dir_exists = True new_file_path = os.path.join(new_dir_name, leaf_file) fileio.copy(leaf_file_path, new_file_path) for sub_dir in sub_dirs: if should_copy(os.path.join(dir_name, sub_dir)): fileio.makedirs(os.path.join(new_dir_name, sub_dir))
def _PrintTaskLogsOnError(self, task): task_log_dir = os.path.join(self._airflow_home, 'logs', '%s.%s' % (self._dag_id, task)) for dir_name, _, leaf_files in fileio.walk(task_log_dir): for leaf_file in leaf_files: leaf_file_path = os.path.join(dir_name, leaf_file) absl.logging.error('Print task log %s:', leaf_file_path) with fileio.open(leaf_file_path, 'r') as f: lines = f.readlines() for line in lines: absl.logging.error(line)
def copy_dir(src: Text, dst: Text) -> None: """Copies the whole directory recursively from source to destination.""" src = src.rstrip('/') dst = dst.rstrip('/') if fileio.exists(dst): fileio.rmtree(dst) fileio.makedirs(dst) for dir_name, sub_dirs, leaf_files in fileio.walk(src): for leaf_file in leaf_files: leaf_file_path = os.path.join(dir_name, leaf_file) new_file_path = os.path.join(dir_name.replace(src, dst, 1), leaf_file) fileio.copy(leaf_file_path, new_file_path) for sub_dir in sub_dirs: fileio.makedirs(os.path.join(dir_name.replace(src, dst, 1), sub_dir))