示例#1
0
 def read_data(self, fd, offset, size):
     if offset >= fd.size:
         return ""
     else:
         try:
             contents = self.hdfs.read_file(fd.abspath,
                                            offset=offset,
                                            length=size)
         except pywebhdfs.errors.Unauthorized as e:
             self.logger.info("Unauthorized for path {0}: {1}".format(
                 fd.abspath, e))
             raise Errors.Unauthorized(
                 "Unauthorized access to the path {0}: {1}".format(
                     fd.abspath, e))
         except requests.exceptions.RequestException as e:
             self.logger.info("ConnectionError for path {0}: {1}".format(
                 fd.abspath, e))
             raise Errors.BadConnection(
                 "Connection error during HDFS read file: {0}, exc={1}".
                 format(fd.abspath, e))
         except pywebhdfs.errors.PyWebHdfsException as e:
             self.logger.info("PyWebHdfsException for path {0}: {1}".format(
                 fd.abspath, e))
             raise Errors.FsException(
                 "An exception happened during HDFS read file: {0}, exc={1}"
                 .format(fd.abspath, e))
         return contents
示例#2
0
 def local_mv_file(self, src, dst):
     if self.simulateOnly:
         print("SIMULATE -> local move file: {0} -> {1} ".format(
             src.abspath, dst.abspath))
     else:
         try:
             self.hdfs.rename_file_dir(src.abspath, dst.abspath)
         except pywebhdfs.errors.Unauthorized as e:
             self.logger.info("Unauthorized for path {0}: {1}".format(
                 src.abspath, e))
             raise Errors.Unauthorized(
                 "Unauthorized access to the path {0}: {1}".format(
                     src.abspath, e))
         except requests.exceptions.RequestException as e:
             self.logger.info("ConnectionError for path {0}: {1}".format(
                 src.abspath, e))
             raise Errors.BadConnection(
                 "Connection error during HDFS rename file: {0}, exc={1}".
                 format(src.abspath, e))
         except pywebhdfs.errors.PyWebHdfsException as e:
             self.logger.info("PyWebHdfsException for path {0}: {1}".format(
                 src.abspath, e))
             raise Errors.FsException(
                 "An exception happened during HDFS rename file: {0}, exc={1}"
                 .format(src.abspath, e))
示例#3
0
 def truncate_file(self, fd, size):
     if self.simulateOnly:
         print("SIMULATE -> truncate file: {0}, size={1}".format(
             fd.abspath, size))
     else:
         try:
             self.hdfs.truncate_file(fd.abspath, size)
         except pywebhdfs.errors.Unauthorized as e:
             self.logger.info("Unauthorized for path {0}: {1}".format(
                 fd.abspath, e))
             raise Errors.Unauthorized(
                 "Unauthorized access to the path {0}: {1}".format(
                     fd.abspath, e))
         except requests.exceptions.RequestException as e:
             self.logger.info("ConnectionError for path {0}: {1}".format(
                 fd.abspath, e))
             raise Errors.BadConnection(
                 "Connection error during HDFS truncate file: {0}, exc={1}".
                 format(fd.abspath, e))
         except pywebhdfs.errors.PyWebHdfsException as e:
             self.logger.info("PyWebHdfsException for path {0}: {1}".format(
                 fd.abspath, e))
             raise Errors.FsException(
                 "An exception happened during HDFS truncate file: {0}, exc={1}"
                 .format(fd.abspath, e))
示例#4
0
    def concat_files(self, fd, chunkFdList):
        strList = list()
        for chunkFd in chunkFdList:
            strList.append(chunkFd.abspath)

        if self.simulateOnly:
            print("SIMULATE -> concat file: {0}, sources={1}".format(
                fd.abspath, ",".join(strList)))
        else:
            try:
                self.hdfs.concat_files(fd.abspath, strList)
            except pywebhdfs.errors.Unauthorized as e:
                self.logger.info("Unauthorized for path {0}: {1}".format(
                    fd.abspath, e))
                raise Errors.Unauthorized(
                    "Unauthorized access to the path {0}: {1}".format(
                        fd.abspath, e))
            except requests.exceptions.RequestException as e:
                self.logger.info("ConnectionError for path {0}: {1}".format(
                    fd.abspath, e))
                raise Errors.BadConnection(
                    "Connection error during HDFS concat file: {0}, exc={1}".
                    format(fd.abspath, e))
            except pywebhdfs.errors.PyWebHdfsException as e:
                self.logger.info("PyWebHdfsException for path {0}: {1}".format(
                    fd.abspath, e))
                raise Errors.FsException(
                    "An exception happened during HDFS concat file: {0}, exc={1}"
                    .format(fd.abspath, e))
示例#5
0
 def list_dir(self, fd):
     try:
         status = self.hdfs.list_dir(fd.abspath)
     except pywebhdfs.errors.Unauthorized as e:
         self.logger.info("Unauthorized for path {0}: {1}".format(
             fd.abspath, e))
         raise Errors.Unauthorized(
             "Unauthorized access to the path {0}: {1}".format(
                 fd.abspath, e))
     except requests.exceptions.RequestException as e:
         self.logger.info("ConnectionError for path {0}: {1}".format(
             fd.abspath, e))
         raise Errors.BadConnection(
             "Connection error while looking for path: {0}, exc={1}".format(
                 fd.abspath, e))
     except pywebhdfs.errors.PyWebHdfsException as e:
         self.logger.info("PyWebHdfsException for path {0}: {1}".format(
             fd.abspath, e))
         raise Errors.FsException(
             "An exception happened while looking for path: {0}, exc={1}".
             format(fd.abspath, e))
     currentDir = status["FileStatuses"]["FileStatus"]
     for item in currentDir:
         yield HadoopFileDescriptor(self,
                                    fd.abspath,
                                    isSrc=True,
                                    needsDstDirCheck=False,
                                    fileJson=item)
示例#6
0
 def delete_file_dir(self, fd, recursive=False, force=False):
     if self.simulateOnly:
         print("SIMULATE -> remove file/dir: {0}, recursive={1}".format(
             fd.abspath, recursive))
     else:
         try:
             if not recursive or force or \
                     query_yes_no(question="Are you sure you want to delete folder recursively?", default="no"):
                 status = self.hdfs.delete_file_dir(fd.abspath,
                                                    recursive=recursive)
         except pywebhdfs.errors.Unauthorized as e:
             self.logger.info("Unauthorized for path {0}: {1}".format(
                 fd.abspath, e))
             raise Errors.Unauthorized(
                 "Unauthorized access to the path {0}: {1}".format(
                     fd.abspath, e))
         except requests.exceptions.RequestException as e:
             self.logger.info("ConnectionError for path {0}: {1}".format(
                 fd.abspath, e))
             raise Errors.BadConnection(
                 "Connection error during HDFS delete directory: {0}, exc={1}"
                 .format(fd.abspath, e))
         except pywebhdfs.errors.PyWebHdfsException as e:
             self.logger.info("PyWebHdfsException for path {0}: {1}".format(
                 fd.abspath, e))
             raise Errors.FsException(
                 "An exception happened during HDFS delete directory: {0}, exc={1}"
                 .format(fd.abspath, e))
示例#7
0
    def __init__(self, localFs, path, isSrc, dstDirMustExist):
        self.fs = localFs
        # Canonicalize path to "/" on descriptor creation
        path = localFs.canonicalize_path(path)

        # Local file system
        if not os.path.exists(path):
            if isSrc:
                raise Errors.FileNotFound("path: %s not found" % path)
            else:
                dstDir, dstName = localFs.get_dir_basename(path)
                if dstDirMustExist and not os.path.exists(dstDir):
                    raise Errors.FileNotFound("destination directory: %s not found" % dstDir)

                self.name = dstName
                self.abspath = path
                self.type = "FILE"

                self.accessTime = datetime.datetime.fromtimestamp(86400)
                self.modificationTime = datetime.datetime.fromtimestamp(86400)
                self.exists = False
                self.replication = "1"
                self.permissions = "777"
                self.owner = "root"
                self.group = "supergroup"
                self.numChildren = "0"
                self.size = 0
                return

        if os.path.islink(path):
            self.type = "SYMLINK"
        elif os.path.isfile(path):
            self.type = "FILE"
        elif os.path.isdir(path):
            self.type = "DIRECTORY"

        self.exists = True
        self.abspath = localFs.canonicalize_path(os.path.abspath(path))
        _, self.name = localFs.get_dir_basename(self.abspath)

        self.replication = "1"
        self.numChildren = "1"

        if os.name == "nt":
            self.size = os.path.getsize(path)
            self.modificationTime = datetime.datetime.fromtimestamp(os.path.getmtime(path))
            self.accessTime = datetime.datetime.fromtimestamp(os.path.getatime(path))
            self.permissions = "777"
            self.owner = "-"
            self.group = "-"
        else:
            statinfo = os.stat(self.abspath)
            self.size = statinfo.st_size
            self.modificationTime = datetime.datetime.fromtimestamp(statinfo.st_mtime)
            self.accessTime = datetime.datetime.fromtimestamp(statinfo.st_atime)
            self.permissions = str(oct(statinfo.st_mode))[-3:]
            self.owner = pwd.getpwuid(statinfo.st_uid).pw_name
            self.group = grp.getgrgid(statinfo.st_gid).gr_name
示例#8
0
 def exists_file_dir(self, fd):
     try:
         return self.hdfs.exists_file_dir(fd.abspath)
     except pywebhdfs.errors.Unauthorized as e:
         self.logger.info("Unauthorized for path {0}: {1}".format(
             fd.abspath, e))
         raise Errors.Unauthorized(
             "Unauthorized access to the path {0}: {1}".format(
                 fd.abspath, e))
     except requests.exceptions.RequestException as e:
         self.logger.info("ConnectionError for path {0}: {1}".format(
             fd.abspath, e))
         raise Errors.BadConnection(
             "Connection error during HDFS exists test: {0}, exc={1}".
             format(fd.abspath, e))
     except pywebhdfs.errors.PyWebHdfsException as e:
         self.logger.info("PyWebHdfsException for path {0}: {1}".format(
             fd.abspath, e))
         raise Errors.FsException(
             "An exception happened during HDFS exists test: {0}, exc={1}".
             format(fd.abspath, e))
示例#9
0
 def make_dir(self, path):
     if self.simulateOnly:
         print("SIMULATE -> make dir: " + path)
     else:
         try:
             self.hdfs.make_dir(path)
         except pywebhdfs.errors.Unauthorized as e:
             self.logger.info("Unauthorized for path {0}: {1}".format(
                 path, e))
             raise Errors.Unauthorized(
                 "Unauthorized access to the path {0}: {1}".format(path, e))
         except requests.exceptions.RequestException as e:
             self.logger.info("ConnectionError for path {0}: {1}".format(
                 path, e))
             raise Errors.BadConnection(
                 "Connection error during HDFS create directory: {0}, exc={1}"
                 .format(path, e))
         except pywebhdfs.errors.PyWebHdfsException as e:
             self.logger.info("PyWebHdfsException for path {0}: {1}".format(
                 path, e))
             raise Errors.FsException(
                 "An exception happened during HDFS create directory: {0}, exc={1}"
                 .format(path, e))
示例#10
0
 def make_fd(self, path, isSrc, dstDirMustExist):
     fd = None
     try:
         fd = HadoopFileDescriptor(self, path, isSrc, dstDirMustExist)
     except pywebhdfs.errors.FileNotFound:
         self.logger.info("DESC: does not exist: " + path)
         raise Errors.FileNotFound("Path {0} does not exist".format(path))
     except pywebhdfs.errors.Unauthorized as e:
         self.logger.info("Unauthorized for path {0}: {1}".format(path, e))
         raise Errors.Unauthorized(
             "Unauthorized access to the path {0}: {1}".format(path, e))
     except requests.exceptions.RequestException as e:
         self.logger.info("ConnectionError for path {0}: {1}".format(
             path, e))
         raise Errors.BadConnection(
             "Connection error while looking for path: {0}, exc={1}".format(
                 path, e))
     except pywebhdfs.errors.PyWebHdfsException as e:
         self.logger.info("PyWebHdfsException for path {0}: {1}".format(
             path, e))
         raise Errors.FsException(
             "An exception happened while looking for path: {0}, exc={1}".
             format(path, e))
     return fd
示例#11
0
文件: BaseFs.py 项目: dc-sunk/cocosun
    def remote_cp_file(self, src, dst, dstFs):
        dstChunkList = None

        # Step 1: Perform a copy
        progressString = "- Progress: "
        self.logger.info("REMOTE COPY ({0}): {1} -> {2}".format(
            src.size, src.abspath, dst.abspath))
        if src.size <= Constants.DEFAULT_BIG_FILE_THRESHOLD:
            if self.verbose and is_normal_stdout():
                sys.stdout.write(progressString)
                sys.stdout.flush()
            if dst.exists:
                dstFs.delete_file_dir(dst)
            dstFs.touch_file(dst)

            self.cp_chunk(src, dst, dstFs, 0, 0, True, "wb")
        else:
            chunk = 0
            offset = 0
            chunkSize = Constants.DEFAULT_BIG_FILE_THRESHOLD
            numChunks = (src.size / chunkSize) + 1
            dstChunkList = list()
            while offset < src.size:
                dstChunk = dstFs.make_fd_retriable(dst.abspath + ".__chunk__" +
                                                   str(chunk),
                                                   isSrc=False,
                                                   dstDirMustExist=True)

                dstChunkList.append(dstChunk)
                self.logger.info("BIG COPY: chunk={0}, dst={1}".format(
                    chunk, dstChunk.abspath))

                if dstChunk.exists:
                    dstFs.delete_file_dir(dstChunk)
                dstFs.touch_file(dstChunk)

                if dstChunk.size == Constants.DEFAULT_BIG_FILE_THRESHOLD \
                        and src.modificationTime <= dstChunk.modificationTime:
                    if self.verbose:
                        print("%s -> %s: skipped" %
                              (src.abspath, dstChunk.abspath))
                elif dstChunk.size > Constants.DEFAULT_BIG_FILE_THRESHOLD:
                    errMsg = "a chunk: {0} has its size bigger than max size, you need remove it before next retry".format(
                        dstChunk.abspath)
                    self.logger.error(errMsg)
                    raise Errors.FsException(errMsg)
                else:
                    if self.verbose:
                        print("%s -> %s" % (src.abspath, dstChunk.abspath))
                        if is_normal_stdout():
                            progressFormatString = "Chunk ({0}/{1}) - "
                            progressString += progressFormatString.format(
                                chunk + 1, numChunks)
                            sys.stdout.write(progressString)
                            sys.stdout.flush()
                    self.cp_chunk(src, dstChunk, dstFs, offset + dstChunk.size,
                                  dstChunk.size, chunk == numChunks - 1, "ab")
                    if self.verbose and is_normal_stdout():
                        sys.stdout.write("\r")
                        sys.stdout.flush()
                chunk += 1
                offset = chunk * chunkSize

        # Step2: concat all chunk files into final file
        self.concat_chunk_files(dstFs, dst, dstChunkList)