def move(src, dest, user=None): """ Move or rename src to dest. """ src_host, src_port, src_path = path.split(src, user) dest_host, dest_port, dest_path = path.split(dest, user) src_fs = hdfs(src_host, src_port, user) dest_fs = hdfs(dest_host, dest_port, user) try: retval = src_fs.move(src_path, dest_fs, dest_path) return retval finally: src_fs.close() dest_fs.close()
def abspath(hdfs_path, user=None, local=False): """ Return an absolute path for ``hdfs_path``. The ``user`` arg is passed to :func:`split`. The ``local`` argument forces ``hdfs_path`` to be interpreted as an ordinary local path: .. code-block:: python >>> import os >>> os.chdir('/tmp') >>> import pydoop.hdfs.path as hpath >>> hpath.abspath('file:/tmp') 'file:/tmp' >>> hpath.abspath('file:/tmp', local=True) 'file:/tmp/file:/tmp' """ if local: return 'file:%s' % os.path.abspath(hdfs_path) if _HdfsPathSplitter.PATTERN.match(hdfs_path): return hdfs_path hostname, port, path = split(hdfs_path, user=user) if hostname: fs = hdfs_fs.hdfs(hostname, port) apath = join("hdfs://%s:%s" % (fs.host, fs.port), path) fs.close() else: apath = "file:%s" % os.path.abspath(path) return apath
def mkdir(hdfs_path, user=None): """ Create a directory and its parents as needed. """ host, port, path_ = path.split(hdfs_path, user) fs = hdfs(host, port, user) retval = fs.create_directory(path_) fs.close() return retval
def exists(hdfs_path, user=None): """ Return ``True`` if ``hdfs_path`` exists in the default HDFS, else ``False``. """ hostname, port, path = split(hdfs_path, user=user) fs = hdfs_fs.hdfs(hostname, port) retval = fs.exists(path) fs.close() return retval
def rmr(hdfs_path, user=None): """ Recursively remove files and directories. """ host, port, path_ = path.split(hdfs_path, user) fs = hdfs(host, port, user) retval = fs.delete(path_) fs.close() return retval
def cp(src_hdfs_path, dest_hdfs_path, **kwargs): """ Copy the contents of ``src_hdfs_path`` to ``dest_hdfs_path``. Additional keyword arguments, if any, are handled like in :func:`open`. If ``src_hdfs_path`` is a directory, its contents will be copied recursively. """ src, dest = {}, {} try: for d, p in ((src, src_hdfs_path), (dest, dest_hdfs_path)): d["host"], d["port"], d["path"] = path.split(p) d["fs"] = hdfs(d["host"], d["port"]) #--- does src exist? --- try: src["info"] = src["fs"].get_path_info(src["path"]) except IOError: raise IOError("no such file or directory: %r" % (src["path"])) #--- src exists. Does dest exist? --- try: dest["info"] = dest["fs"].get_path_info(dest["path"]) except IOError: if src["info"]["kind"] == "file": _cp_file(src["fs"], src["path"], dest["fs"], dest["path"], **kwargs) return else: dest["fs"].create_directory(dest["path"]) dest_hdfs_path = dest["fs"].get_path_info(dest["path"])["name"] for item in src["fs"].list_directory(src["path"]): cp(item["name"], dest_hdfs_path, **kwargs) return #--- dest exists. Is it a file? --- if dest["info"]["kind"] == "file": raise IOError("%r already exists" % (dest["path"])) #--- dest is a directory --- dest["path"] = path.join(dest["path"], path.basename(src["path"])) if dest["fs"].exists(dest["path"]): raise IOError("%r already exists" % (dest["path"])) if src["info"]["kind"] == "file": _cp_file(src["fs"], src["path"], dest["fs"], dest["path"], **kwargs) else: dest["fs"].create_directory(dest["path"]) dest_hdfs_path = dest["fs"].get_path_info(dest["path"])["name"] for item in src["fs"].list_directory(src["path"]): cp(item["name"], dest_hdfs_path, **kwargs) finally: for d in src, dest: try: d["fs"].close() except KeyError: pass
def lsl(hdfs_path, user=None): """ Return a list of dictionaries of file properties. If ``hdfs_path`` is a directory, each list item corresponds to a file or directory contained by it; if it is a file, there is only one item corresponding to the file itself. """ host, port, path_ = path.split(hdfs_path, user) fs = hdfs(host, port, user) dir_list = fs.list_directory(path_) fs.close() return dir_list
def open(hdfs_path, mode="r", buff_size=0, replication=0, blocksize=0, readline_chunk_size=common.BUFSIZE, user=None): """ Open a file, returning an :class:`hdfs_file` object. ``hdfs_path`` and ``user`` are passed to :func:`~path.split`, while the other args are passed to the :class:`hdfs_file` constructor. """ host, port, path_ = path.split(hdfs_path, user) fs = hdfs(host, port, user) return fs.open_file(path_, mode, buff_size, replication, blocksize, readline_chunk_size)
def kind(path, user=None): """ Get the kind of item that the path references. Return None if the path doesn't exist. """ hostname, port, path = split(path, user=user) fs = hdfs_fs.hdfs(hostname, port) try: return fs.get_path_info(path)['kind'] except IOError: return None finally: fs.close()
def chmod(hdfs_path, mode, user=None): """ Change file mode bits. :type path: string :param path: the path to the file or directory :type mode: int :param mode: the bitmask to set it to (e.g., 0777) """ host, port, path_ = path.split(hdfs_path, user) fs = hdfs(host, port, user) retval = fs.chmod(path_, mode) fs.close() return retval
def lsl(hdfs_path, user=None, recursive=False): """ Return a list of dictionaries of file properties. If ``hdfs_path`` is a file, there is only one item corresponding to the file itself; if it is a directory and ``recursive`` is :obj:`False`, each list item corresponds to a file or directory contained by it; if it is a directory and ``recursive`` is :obj:`True`, the list contains one item for every file or directory in the tree rooted at ``hdfs_path``. """ host, port, path_ = path.split(hdfs_path, user) fs = hdfs(host, port, user) if not recursive: dir_list = fs.list_directory(path_) else: treewalk = fs.walk(path_) top = treewalk.next() if top['kind'] == 'directory': dir_list = list(treewalk) else: dir_list = [top] fs.close() return dir_list