Exemplo n.º 1
0
 def cp(self, recursive, overwrite, src, dst, headers=None):
     if not DbfsPath.is_valid(src) and DbfsPath.is_valid(dst):
         if not os.path.exists(src):
             error_and_quit('The local file {} does not exist.'.format(src))
         if not recursive:
             if os.path.isdir(src):
                 error_and_quit(
                     ('The local file {} is a directory. You must provide --recursive')
                     .format(src))
             self._copy_to_dbfs_non_recursive(src, DbfsPath(dst), overwrite, headers=headers)
         else:
             if not os.path.isdir(src):
                 self._copy_to_dbfs_non_recursive(src, DbfsPath(dst), overwrite, headers=headers)
                 return
             self._copy_to_dbfs_recursive(src, DbfsPath(dst), overwrite, headers=headers)
     # Copy from DBFS in this case
     elif DbfsPath.is_valid(src) and not DbfsPath.is_valid(dst):
         if not recursive:
             self._copy_from_dbfs_non_recursive(DbfsPath(src), dst, overwrite, headers=headers)
         else:
             dbfs_path_src = DbfsPath(src)
             if not self.get_status(dbfs_path_src, headers=headers).is_dir:
                 self._copy_from_dbfs_non_recursive(dbfs_path_src, dst, overwrite,
                                                    headers=headers)
             self._copy_from_dbfs_recursive(dbfs_path_src, dst, overwrite, headers=headers)
     elif not DbfsPath.is_valid(src) and not DbfsPath.is_valid(dst):
         error_and_quit('Both paths provided are from your local filesystem. '
                        'To use this utility, one of the src or dst must be prefixed '
                        'with dbfs:/')
     elif DbfsPath.is_valid(src) and DbfsPath.is_valid(dst):
         error_and_quit('Both paths provided are from the DBFS filesystem. '
                        'To copy between the DBFS filesystem, you currently must copy the '
                        'file from DBFS to your local filesystem and then back.')
     else:
         assert False, 'not reached'
Exemplo n.º 2
0
def cp_cli(api_client, recursive, overwrite, src, dst):
    """
    Copy files to and from DBFS.

    Note that this function will fail if the src and dst are both on the local filesystem
    or if they are both DBFS paths.

    For non-recursive copies, if the dst is a directory, the file will be placed inside the
    directory. For example ``dbfs cp dbfs:/apple.txt .`` will create a file at `./apple.txt`.

    For recursive copies, files inside of the src directory will be copied inside the dst directory
    with the same name. If the dst path does not exist, a directory will be created. For example
    ``dbfs cp -r dbfs:/foo foo`` will create a directory foo and place the files ``dbfs:/foo/a`` at
    ``foo/a``. If ``foo/a`` already exists, the file will not be overriden unless the --overwrite
    flag is provided -- however, dbfs cp --recursive will continue to try and copy other files.
    """
    # Copy to DBFS in this case
    dbfs_api = DbfsApi(api_client)
    if not DbfsPath.is_valid(src) and DbfsPath.is_valid(dst):
        if not os.path.exists(src):
            error_and_quit('The local file {} does not exist.'.format(src))
        if not recursive:
            if os.path.isdir(src):
                error_and_quit((
                    'The local file {} is a directory. You must provide --recursive'
                ).format(src))
            copy_to_dbfs_non_recursive(dbfs_api, src, DbfsPath(dst), overwrite)
        else:
            if not os.path.isdir(src):
                copy_to_dbfs_non_recursive(dbfs_api, src, DbfsPath(dst),
                                           overwrite)
                return
            copy_to_dbfs_recursive(dbfs_api, src, DbfsPath(dst), overwrite)
    # Copy from DBFS in this case
    elif DbfsPath.is_valid(src) and not DbfsPath.is_valid(dst):
        if not recursive:
            copy_from_dbfs_non_recursive(dbfs_api, DbfsPath(src), dst,
                                         overwrite)
        else:
            dbfs_path_src = DbfsPath(src)
            if not dbfs_api.get_status(dbfs_path_src).is_dir:
                copy_from_dbfs_non_recursive(dbfs_api, dbfs_path_src, dst,
                                             overwrite)
            copy_from_dbfs_recursive(dbfs_api, dbfs_path_src, dst, overwrite)
    elif not DbfsPath.is_valid(src) and not DbfsPath.is_valid(dst):
        error_and_quit(
            'Both paths provided are from your local filesystem. '
            'To use this utility, one of the src or dst must be prefixed '
            'with dbfs:/')
    elif DbfsPath.is_valid(src) and DbfsPath.is_valid(dst):
        error_and_quit(
            'Both paths provided are from the DBFS filesystem. '
            'To copy between the DBFS filesystem, you currently must copy the '
            'file from DBFS to your local filesystem and then back.')
    else:
        assert False, 'not reached'
Exemplo n.º 3
0
 def cp(self, recursive, overwrite, src, dst, headers=None):
     if not DbfsPath.is_valid(src) and DbfsPath.is_valid(dst):
         if not os.path.exists(src):
             error_and_quit('The local file {} does not exist.'.format(src))
         if not recursive:
             if os.path.isdir(src):
                 error_and_quit((
                     'The local file {} is a directory. You must provide --recursive'
                 ).format(src))
             self._copy_to_dbfs_non_recursive(src,
                                              DbfsPath(dst),
                                              overwrite,
                                              headers=headers)
         else:
             if not os.path.isdir(src):
                 self._copy_to_dbfs_non_recursive(src,
                                                  DbfsPath(dst),
                                                  overwrite,
                                                  headers=headers)
                 return
             self._copy_to_dbfs_recursive(src,
                                          DbfsPath(dst),
                                          overwrite,
                                          headers=headers)
     # Copy from DBFS in this case
     elif DbfsPath.is_valid(src) and not DbfsPath.is_valid(dst):
         if not recursive:
             self._copy_from_dbfs_non_recursive(DbfsPath(src),
                                                dst,
                                                overwrite,
                                                headers=headers)
         else:
             dbfs_path_src = DbfsPath(src)
             if not self.get_status(dbfs_path_src, headers=headers).is_dir:
                 self._copy_from_dbfs_non_recursive(dbfs_path_src,
                                                    dst,
                                                    overwrite,
                                                    headers=headers)
             self._copy_from_dbfs_recursive(dbfs_path_src,
                                            dst,
                                            overwrite,
                                            headers=headers)
     elif not DbfsPath.is_valid(src) and not DbfsPath.is_valid(dst):
         error_and_quit(
             'Both paths provided are from your local filesystem. '
             'To use this utility, one of the src or dst must be prefixed '
             'with dbfs:/')
     elif DbfsPath.is_valid(src) and DbfsPath.is_valid(dst):
         with TempDir() as temp_dir:
             # Always copy to <temp_dir>/temp since this will work no matter if it's a
             # recursive or a non-recursive copy.
             temp_path = temp_dir.path('temp')
             self.cp(recursive, True, src, temp_path)
             self.cp(recursive, overwrite, temp_path, dst)
     else:
         assert False, 'not reached'
 def test_is_valid_false(self):
     assert not DbfsPath.is_valid('/test')
     assert not DbfsPath.is_valid('test')
 def test_is_valid_true(self):
     assert DbfsPath.is_valid('dbfs:/test')