Пример #1
0
    def _rename_gcs_batch(src_dest_pairs):
        # Prepare batches.
        gcs_batches = []
        gcs_current_batch = []
        for src, dest in src_dest_pairs:
            gcs_current_batch.append((src, dest))
            if len(gcs_current_batch) == gcsio.MAX_BATCH_OPERATION_SIZE:
                gcs_batches.append(gcs_current_batch)
                gcs_current_batch = []
        if gcs_current_batch:
            gcs_batches.append(gcs_current_batch)

        # Execute GCS renames if any and return exceptions.
        exceptions = []
        for batch in gcs_batches:
            copy_statuses = gcsio.GcsIO().copy_batch(batch)
            copy_succeeded = []
            for src, dest, exception in copy_statuses:
                if exception:
                    exceptions.append((src, dest, exception))
                else:
                    copy_succeeded.append((src, dest))
            delete_batch = [src for src, dest in copy_succeeded]
            delete_statuses = gcsio.GcsIO().delete_batch(delete_batch)
            for i, (src, exception) in enumerate(delete_statuses):
                dest = copy_succeeded[i]
                if exception:
                    exceptions.append((src, dest, exception))
        return exceptions
Пример #2
0
    def size_of_files_in_glob(path, file_names=None):
        """Returns a map of file names to sizes.

    Args:
      path: a file path pattern that reads the size of all the files
      file_names: List of file names that we need size for, this is added to
        support eventually consistent sources where two expantions of glob
        might yield to different files.
    """
        if path.startswith('gs://'):
            file_sizes = gcsio.GcsIO().size_of_files_in_glob(path)
            if file_names is None:
                return file_sizes
            else:
                result = {}
                # We need to make sure we fetched the size for all the files as the
                # list API in GCS is eventually consistent so directly call size for
                # any files that may be missing.
                for file_name in file_names:
                    if file_name in file_sizes:
                        result[file_name] = file_sizes[file_name]
                    else:
                        result[file_name] = ChannelFactory.size_in_bytes(
                            file_name)
                return result
        else:
            if file_names is None:
                file_names = ChannelFactory.glob(path)
            return {
                file_name: ChannelFactory.size_in_bytes(file_name)
                for file_name in file_names
            }
Пример #3
0
 def rm(path):
     if path.startswith('gs://'):
         gcsio.GcsIO().delete(path)
     else:
         try:
             os.remove(path)
         except OSError as err:
             raise IOError(err)
Пример #4
0
    def size_in_bytes(path):
        """Returns the size of a file in bytes.

    Args:
      path: a string that gives the path of a single file.
    """
        if path.startswith('gs://'):
            return gcsio.GcsIO().size(path)
        else:
            return os.path.getsize(path)
Пример #5
0
 def rename(src, dest):
     if src.startswith('gs://'):
         if not dest.startswith('gs://'):
             raise ValueError('Destination %r must be GCS path.', dest)
         gcsio.GcsIO().rename(src, dest)
     else:
         try:
             os.rename(src, dest)
         except OSError as err:
             raise IOError(err)
Пример #6
0
 def rmdir(path):
     if path.startswith('gs://'):
         gcs = gcsio.GcsIO()
         if not path.endswith('/'):
             path += '/'
         # TODO: Threadpool?
         for entry in gcs.glob(path + '*'):
             gcs.delete(entry)
     else:
         try:
             shutil.rmtree(path)
         except OSError as err:
             raise IOError(err)
Пример #7
0
 def copytree(src, dest):
     if src.startswith('gs://'):
         if not dest.startswith('gs://'):
             raise ValueError('Destination %r must be GCS path.', dest)
         assert src.endswith('/'), src
         assert dest.endswith('/'), dest
         gcsio.GcsIO().copytree(src, dest)
     else:
         try:
             if os.path.exists(dest):
                 shutil.rmtree(dest)
             shutil.copytree(src, dest)
         except OSError as err:
             raise IOError(err)
Пример #8
0
    def open(path,
             mode,
             mime_type='application/octet-stream',
             compression_type=CompressionTypes.AUTO):
        if compression_type == CompressionTypes.AUTO:
            compression_type = CompressionTypes.detect_compression_type(path)
        elif not CompressionTypes.is_valid_compression_type(compression_type):
            raise TypeError(
                'compression_type must be CompressionType object but '
                'was %s' % type(compression_type))

        if path.startswith('gs://'):
            raw_file = gcsio.GcsIO().open(path,
                                          mode,
                                          mime_type=CompressionTypes.mime_type(
                                              compression_type, mime_type))
        else:
            raw_file = open(path, mode)

        if compression_type == CompressionTypes.UNCOMPRESSED:
            return raw_file
        else:
            return _CompressedFile(raw_file, compression_type=compression_type)
Пример #9
0
 def glob(path, limit=None):
     if path.startswith('gs://'):
         return gcsio.GcsIO().glob(path, limit)
     else:
         files = glob.glob(path)
         return files[:limit]
Пример #10
0
 def exists(path):
     if path.startswith('gs://'):
         return gcsio.GcsIO().exists(path)
     else:
         return os.path.exists(path)