Пример #1
0
  def copyfile(self, src, dst, skip_header=False):
    sb = self._stats(src)
    if sb is None:
      raise IOError(errno.ENOENT, _("Copy src '%s' does not exist") % src)
    if sb.isDir:
      raise IOError(errno.INVAL, _("Copy src '%s' is a directory") % src)
    if self.isdir(dst):
      raise IOError(errno.INVAL, _("Copy dst '%s' is a directory") % dst)

    offset = 0

    while True:
      data = self.read(src, offset, UPLOAD_CHUNK_SIZE.get())
      cnt = len(data)

      if offset == 0:
        if skip_header:
          n = data.index('\n')
          if n > 0:
            data = data[n + 1:]
        self.create(dst,
                    overwrite=True,
                    blocksize=sb.blockSize,
                    replication=sb.replication,
                    permission=oct(stat.S_IMODE(sb.mode)),
                    data=data)
      else:
        self.append(dst, data)

      if cnt < UPLOAD_CHUNK_SIZE.get():
        break

      offset += cnt
Пример #2
0
    def copyfile(self, src, dst, skip_header=False):
        sb = self._stats(src)
        if sb is None:
            raise IOError(errno.ENOENT,
                          _("Copy src '%s' does not exist") % src)
        if sb.isDir:
            raise IOError(errno.INVAL, _("Copy src '%s' is a directory") % src)
        if self.isdir(dst):
            raise IOError(errno.INVAL, _("Copy dst '%s' is a directory") % dst)

        offset = 0

        while True:
            data = self.read(src, offset, UPLOAD_CHUNK_SIZE.get())
            cnt = len(data)

            if offset == 0:
                if skip_header:
                    n = data.index('\n')
                    if n > 0:
                        data = data[n + 1:]
                self.create(dst,
                            overwrite=True,
                            blocksize=sb.blockSize,
                            replication=sb.replication,
                            permission=oct(stat.S_IMODE(sb.mode)),
                            data=data)
            else:
                self.append(dst, data)

            if cnt < UPLOAD_CHUNK_SIZE.get():
                break

            offset += cnt
Пример #3
0
  def copyfile(self, src, dst):
    sb = self._stats(src)
    if sb is None:
      raise IOError(errno.ENOENT, "Copy src '%s' does not exist" % (src,))
    if sb.isDir:
      raise IOError(errno.INVAL, "Copy src '%s' is a directory" % (src,))
    if self.isdir(dst):
      raise IOError(errno.INVAL, "Copy dst '%s' is a directory" % (dst,))

    offset = 0

    while True:
      data = self.read(src, offset, UPLOAD_CHUNK_SIZE.get())
      if offset == 0:
        self.create(dst,
                    overwrite=True,
                    blocksize=sb.blockSize,
                    replication=sb.replication,
                    permission=oct(stat.S_IMODE(sb.mode)),
                    data=data)

      cnt = len(data)
      if cnt == 0:
        break

      if offset != 0:
        self.append(dst, data)
      offset += cnt
Пример #4
0
 def __init__(self, request):
   FileUploadHandler.__init__(self, request)
   self._file = None
   self._starttime = 0
   self._activated = False
   # Need to directly modify FileUploadHandler.chunk_size
   FileUploadHandler.chunk_size = UPLOAD_CHUNK_SIZE.get()
Пример #5
0
 def __init__(self, request):
     FileUploadHandler.__init__(self, request)
     self._file = None
     self._starttime = 0
     self._activated = False
     # Need to directly modify FileUploadHandler.chunk_size
     FileUploadHandler.chunk_size = UPLOAD_CHUNK_SIZE.get()
Пример #6
0
 def read_in_chunks(fs, path, offset=0):
   while True:
     chunk = fs.read(path, offset, UPLOAD_CHUNK_SIZE.get())
     if chunk:
       offset += len(chunk)
       yield chunk
     else:
       return
Пример #7
0
  def test_upload_file(self):
    with tempfile.NamedTemporaryFile() as local_file:
      # Make sure we can upload larger than the UPLOAD chunk size
      file_size = UPLOAD_CHUNK_SIZE.get() * 2
      local_file.write('0' * file_size)
      local_file.flush()

      prefix = self.cluster.fs_prefix + '/test_upload_file'
      self.cluster.fs.mkdir(prefix)

      USER_NAME = 'test'
      HDFS_DEST_DIR = prefix + "/tmp/fb-upload-test"
      LOCAL_FILE = local_file.name
      HDFS_FILE = HDFS_DEST_DIR + '/' + os.path.basename(LOCAL_FILE)

      self.cluster.fs.do_as_superuser(self.cluster.fs.mkdir, HDFS_DEST_DIR)
      self.cluster.fs.do_as_superuser(self.cluster.fs.chown, HDFS_DEST_DIR, USER_NAME, USER_NAME)
      self.cluster.fs.do_as_superuser(self.cluster.fs.chmod, HDFS_DEST_DIR, 0700)

      stats = self.cluster.fs.stats(HDFS_DEST_DIR)
      assert_equal(stats['user'], USER_NAME)
      assert_equal(stats['group'], USER_NAME)

      # Just upload the current python file
      resp = self.c.post('/filebrowser/upload/file?dest=%s' % HDFS_DEST_DIR, # GET param avoids infinite looping
                         dict(dest=HDFS_DEST_DIR, hdfs_file=file(LOCAL_FILE)))
      response = json.loads(resp.content)

      assert_equal(0, response['status'], response)
      stats = self.cluster.fs.stats(HDFS_FILE)
      assert_equal(stats['user'], USER_NAME)
      assert_equal(stats['group'], USER_NAME)

      f = self.cluster.fs.open(HDFS_FILE)
      actual = f.read(file_size)
      expected = file(LOCAL_FILE).read()
      assert_equal(actual, expected, 'files do not match: %s != %s' % (len(actual), len(expected)))

      # Upload again and so fails because file already exits
      resp = self.c.post('/filebrowser/upload/file?dest=%s' % HDFS_DEST_DIR,
                         dict(dest=HDFS_DEST_DIR, hdfs_file=file(LOCAL_FILE)))
      response = json.loads(resp.content)
      assert_equal(-1, response['status'], response)
      assert_true('already exists' in response['data'], response)

      # Upload in / and fails because of missing permissions
      not_me = make_logged_in_client("not_me", is_superuser=False)
      grant_access("not_me", "not_me", "filebrowser")
      try:
        resp = not_me.post('/filebrowser/upload/file?dest=%s' % HDFS_DEST_DIR,
                           dict(dest=HDFS_DEST_DIR, hdfs_file=file(LOCAL_FILE)))
        response = json.loads(resp.content)
        assert_equal(-1, response['status'], response)
        assert_true('Permission denied' in response['data'], response)
      except AttributeError:
        # Seems like a Django bug.
        # StopFutureHandlers() does not seem to work in test mode as it continues to MemoryFileUploadHandler after perm issue and so fails.
        pass
Пример #8
0
  def test_upload_file(self):
    with tempfile.NamedTemporaryFile() as local_file:
      # Make sure we can upload larger than the UPLOAD chunk size
      file_size = UPLOAD_CHUNK_SIZE.get() * 2
      local_file.write('0' * file_size)
      local_file.flush()

      prefix = self.cluster.fs_prefix + '/test_upload_file'
      self.cluster.fs.mkdir(prefix)

      USER_NAME = 'test'
      HDFS_DEST_DIR = prefix + "/tmp/fb-upload-test"
      LOCAL_FILE = local_file.name
      HDFS_FILE = HDFS_DEST_DIR + '/' + os.path.basename(LOCAL_FILE)

      self.cluster.fs.do_as_superuser(self.cluster.fs.mkdir, HDFS_DEST_DIR)
      self.cluster.fs.do_as_superuser(self.cluster.fs.chown, HDFS_DEST_DIR, USER_NAME, USER_NAME)
      self.cluster.fs.do_as_superuser(self.cluster.fs.chmod, HDFS_DEST_DIR, 0700)

      stats = self.cluster.fs.stats(HDFS_DEST_DIR)
      assert_equal(stats['user'], USER_NAME)
      assert_equal(stats['group'], USER_NAME)

      # Just upload the current python file
      resp = self.c.post('/filebrowser/upload/file?dest=%s' % HDFS_DEST_DIR, # GET param avoids infinite looping
                         dict(dest=HDFS_DEST_DIR, hdfs_file=file(LOCAL_FILE)))
      response = json.loads(resp.content)

      assert_equal(0, response['status'], response)
      stats = self.cluster.fs.stats(HDFS_FILE)
      assert_equal(stats['user'], USER_NAME)
      assert_equal(stats['group'], USER_NAME)

      f = self.cluster.fs.open(HDFS_FILE)
      actual = f.read(file_size)
      expected = file(LOCAL_FILE).read()
      assert_equal(actual, expected, 'files do not match: %s != %s' % (len(actual), len(expected)))

      # Upload again and so fails because file already exits
      resp = self.c.post('/filebrowser/upload/file?dest=%s' % HDFS_DEST_DIR,
                         dict(dest=HDFS_DEST_DIR, hdfs_file=file(LOCAL_FILE)))
      response = json.loads(resp.content)
      assert_equal(-1, response['status'], response)
      assert_true('already exists' in response['data'], response)

      # Upload in / and fails because of missing permissions
      not_me = make_logged_in_client("not_me", is_superuser=False)
      grant_access("not_me", "not_me", "filebrowser")
      try:
        resp = not_me.post('/filebrowser/upload/file?dest=%s' % HDFS_DEST_DIR,
                           dict(dest=HDFS_DEST_DIR, hdfs_file=file(LOCAL_FILE)))
        response = json.loads(resp.content)
        assert_equal(-1, response['status'], response)
        assert_true('User not_me does not have permissions' in response['data'], response)
      except AttributeError:
        # Seems like a Django bug.
        # StopFutureHandlers() does not seem to work in test mode as it continues to MemoryFileUploadHandler after perm issue and so fails.
        pass
Пример #9
0
 def __init__(self, request):
   FileUploadHandler.__init__(self, request)
   self._file = None
   self._starttime = 0
   self._activated = False
   self._destination = request.GET.get('dest', None) # GET param avoids infinite looping
   self.request = request
   # Need to directly modify FileUploadHandler.chunk_size
   FileUploadHandler.chunk_size = UPLOAD_CHUNK_SIZE.get()
Пример #10
0
 def __init__(self, request):
     FileUploadHandler.__init__(self, request)
     self._file = None
     self._starttime = 0
     self._activated = False
     self._destination = request.GET.get(
         'dest', None)  # GET param avoids infinite looping
     self.request = request
     # Need to directly modify FileUploadHandler.chunk_size
     FileUploadHandler.chunk_size = UPLOAD_CHUNK_SIZE.get()
Пример #11
0
 def read_in_chunks(fs, path, offset):
   src_file_obj = fs.open(path, mode='r')
   gz = gzip.GzipFile(fileobj=src_file_obj, mode='rb')
   gz.read(offset)
   while True:
     chunk = gz.read(UPLOAD_CHUNK_SIZE.get())
     if chunk:
       yield chunk
     else:
       src_file_obj.close()
       return
   src_file_obj.close()
Пример #12
0
    def __init__(self, request):
        FileUploadHandler.__init__(self, request)
        self._file = None
        self._starttime = 0
        self._activated = False
        self._destination = request.GET.get(
            'dest', None)  # GET param avoids infinite looping
        self.request = request
        fs = fsmanager.get_filesystem('default')
        fs.setuser(request.user.username)
        FileUploadHandler.chunk_size = fs.get_upload_chuck_size(
            self._destination) if self._destination else UPLOAD_CHUNK_SIZE.get(
            )

        LOG.debug("Chunk size = %d" % FileUploadHandler.chunk_size)
Пример #13
0
 def get_upload_chuck_size(self):
     from hadoop.conf import UPLOAD_CHUNK_SIZE  # circular dependency
     return UPLOAD_CHUNK_SIZE.get()