示例#1
0
    def delete_file(self, path):
        """Delete file at path."""
        path = path.strip('/')
        hdfs_path = to_os_path(path, self.root_dir)
        if self._hdfs_dir_exists(hdfs_path):

            listing = self._hdfs_ls(hdfs_path)
            # Don't delete non-empty directories.
            # A directory containing only leftover checkpoints is
            # considered empty.
            #cp_dir = getattr(self.checkpoints, 'checkpoint_dir', None)
            #for longentry in listing:
            #    entry = longentry.strip('/').rsplit('/', 1)[-1]
            #    if entry != cp_dir:
            #        raise web.HTTPError(400, u'Directory %s not empty' % hdfs_path)
        elif not self._hdfs_file_exists(hdfs_path):
            raise web.HTTPError(404, u'File does not exist: %s' % hdfs_path)

        if self._hdfs_dir_exists(hdfs_path):
            self.log.debug("Removing directory %s", hdfs_path)
            try:
                self.hdfs.delete(hdfs_path, recursive=True)
            except:
                raise HTTPError(403, u'Permission denied: %s' % path)
        else:
            self.log.debug("Removing file %s", hdfs_path)
            try:
                self.hdfs.delete(hdfs_path, recursive=False)
            except:
                raise HTTPError(403, u'Permission denied: %s' % path)
示例#2
0
    def _base_model(self, path):
        """Build the common base of a hdfscontents model"""
        hdfs_path = to_os_path(path, self.root_dir)

        info = self.hdfs.get_path_info(hdfs_path)
        last_modified = tz.utcfromtimestamp(info.get(u'last_mod'))

        # TODO: don't have time created! now storing last accessed instead
        created = tz.utcfromtimestamp(info.get(u'last_access'))
        # Create the base model.
        model = {}
        model['name'] = path.rsplit('/', 1)[-1]
        model['path'] = path
        model['last_modified'] = last_modified
        model['created'] = created
        model['content'] = None
        model['format'] = None
        model['mimetype'] = None

        # TODO: Now just checking if user have write permission in HDFS. Need to cover all cases and check the user & group?
        try:
            model['writable'] = (info.get(u'permissions') & 0o0200) > 0
        except OSError:
            self.log.error("Failed to check write permissions on %s",
                           hdfs_path)
            model['writable'] = False
        return model
示例#3
0
    def _dir_model(self, path, content=True):
        """Build a model for a directory
        if content is requested, will include a listing of the directory
        """
        hdfs_path = to_os_path(path, self.root_dir)
        four_o_four = u'directory does not exist: %r' % path

        if not self.dir_exists(path):
            raise web.HTTPError(404, four_o_four)
        elif self.is_hidden(path):
            self.log.info(
                "Refusing to serve hidden directory %r, via 404 Error",
                hdfs_path)
            raise web.HTTPError(404, four_o_four)

        model = self._base_model(path)
        model['type'] = 'directory'
        if content:
            model['content'] = contents = []

            for subpath in self._hdfs_ls(hdfs_path):

                name = subpath.strip('/').rsplit('/', 1)[-1]
                if self.should_list(
                        name) and not self._hdfs_is_hidden(subpath):
                    contents.append(
                        self.get(path='%s/%s' % (path, name), content=False))

            model['format'] = 'json'
        return model
示例#4
0
    def _file_model(self, path, content=True, format=None):
        """Build a model for a file
        if content is requested, include the file hdfscontents.
        format:
          If 'text', the hdfscontents will be decoded as UTF-8.
          If 'base64', the raw bytes hdfscontents will be encoded as base64.
          If not specified, try to decode as UTF-8, and fall back to base64
        """
        model = self._base_model(path)
        model['type'] = 'file'

        hdfs_path = to_os_path(path, self.root_dir)
        model['mimetype'] = mimetypes.guess_type(hdfs_path)[0]

        if content:
            content, format = self._read_file(hdfs_path, format)
            if model['mimetype'] is None:
                default_mime = {
                    'text': 'text/plain',
                    'base64': 'application/octet-stream'
                }[format]
                model['mimetype'] = default_mime

            model.update(
                content=content,
                format=format,
            )

        return model
    def _get_git_notebooks(self, base_arg):
        # Sometimes the root dir of the files is not cwd
        nb_root = getattr(self.contents_manager, 'root_dir', None)
        # Resolve base argument to a file system path
        base = os.path.realpath(to_os_path(base_arg, nb_root))

        # Ensure path/root_dir that can be sent to git:
        try:
            git_root = find_repo_root(base)
        except InvalidGitRepositoryError as e:
            self.log.exception(e)
            raise HTTPError(422, 'Invalid notebook: %s' % base)
        base = os.path.relpath(base, git_root)

        # Get the base/remote notebooks:
        try:
            for fbase, fremote in changed_notebooks('HEAD', None, base,
                                                    git_root):
                base_nb = read_notebook(fbase, on_null='minimal')
                remote_nb = read_notebook(fremote, on_null='minimal')
                break  # there should only ever be one set of files
            else:
                # The filename was either invalid or the file is unchanged
                # Assume unchanged, and let read_notebook handle error
                # reporting if invalid
                base_nb = self.read_notebook(os.path.join(git_root, base))
                remote_nb = base_nb
        except (InvalidGitRepositoryError, BadName) as e:
            self.log.exception(e)
            raise HTTPError(422, 'Invalid notebook: %s' % base_arg)
        return base_nb, remote_nb
示例#6
0
    def save(self, model, path=''):
        """
                    Save a file or directory model to path.
                    Should return the saved model with no content.  Save implementations
                    should call self.run_pre_save_hook(model=model, path=path) prior to
                    writing any data.
                    """
        path = path.strip('/')

        if 'type' not in model:
            raise web.HTTPError(400, u'No file type provided')
        if 'content' not in model and model['type'] != 'directory':
            raise web.HTTPError(400, u'No file content provided')

        path = path.strip('/')
        hdfs_path = to_os_path(path, self.root_dir)
        self.log.debug("Saving %s", hdfs_path)

        self.run_pre_save_hook(model=model, path=path)

        try:
            if model['type'] == 'notebook':
                nb = nbformat.from_dict(model['content'])
                self.check_and_sign(nb, path)
                self._save_notebook(hdfs_path, nb)
                # One checkpoint should always exist for notebooks.
                if not self.checkpoints.list_checkpoints(path):
                    self.create_checkpoint(path)
            elif model['type'] == 'file':
                # Missing format will be handled internally by _save_file.
                self._save_file(hdfs_path, model['content'],
                                model.get('format'))
            elif model['type'] == 'directory':
                self._save_directory(hdfs_path, model, path)
            else:
                raise web.HTTPError(
                    400, "Unhandled hdfscontents type: %s" % model['type'])
        except web.HTTPError:
            raise
        except Exception as e:
            self.log.error(u'Error while saving file: %s %s',
                           path,
                           e,
                           exc_info=True)
            raise web.HTTPError(
                500, u'Unexpected error while saving file: %s %s' % (path, e))

        validation_message = None
        if model['type'] == 'notebook':
            self.validate_notebook_model(model)
            validation_message = model.get('message', None)

        model = self.get(path, content=False)
        if validation_message:
            model['message'] = validation_message

        #self.run_post_save_hook(model=model, os_path=hdfs_path)

        return model
示例#7
0
 def cwd_for_path(self, path):
     """Turn API path into absolute OS path."""
     os_path = to_os_path(path, self.root_dir)
     # in the case of notebooks and kernels not being on the same filesystem,
     # walk up to root_dir if the paths don't exist
     while not os.path.isdir(os_path) and os_path != self.root_dir:
         os_path = os.path.dirname(os_path)
     return os_path
 def cwd_for_path(self, path):
     """Turn API path into absolute OS path."""
     os_path = to_os_path(path, self.root_dir)
     # in the case of notebooks and kernels not being on the same filesystem,
     # walk up to root_dir if the paths don't exist
     while not os.path.isdir(os_path) and os_path != self.root_dir:
         os_path = os.path.dirname(os_path)
     return os_path
示例#9
0
 def start_kernel(self, kernel_id=None, path=None, **kwargs):
     if path:
         env = kwargs.pop('env', os.environ).copy()
         env["virtual_path"] = to_os_path(path, "/")
         kwargs["env"] = env
     kernel_id = yield super(PGKernelManager, self).start_kernel(kernel_id=kernel_id, path=path, **kwargs)
     # py2-compat
     raise gen.Return(kernel_id)
示例#10
0
    def rename_file(self, old_path, new_path):
        """Rename a file."""
        old_path = old_path.strip('/')
        new_path = new_path.strip('/')
        if new_path == old_path:
            return

        new_hdfs_path = to_os_path(new_path, self.root_dir)
        old_hdfs_path = to_os_path(old_path, self.root_dir)

        # Should we proceed with the move?
        if self._hdfs_exists(new_hdfs_path):
            raise web.HTTPError(409, u'File already exists: %s' % new_path)

        # Move the file
        try:
            self._hdfs_move_file(old_hdfs_path, new_hdfs_path)
        except Exception as e:
            raise web.HTTPError(500, u'Unknown error renaming file: %s %s' % (old_path, e))
示例#11
0
 def cwd_for_path(self, path):
     """Turn API path into absolute OS path."""
     os_path = to_os_path(path, self.root_dir)
     # in the case of notebooks and kernels not being on the same filesystem,
     # walk up to root_dir if the paths don't exist
     while not os.path.isdir(os_path) and os_path != self.root_dir:
         os_path = os.path.dirname(os_path)
     # iOS: make sure we can access the directory. Otherwise, start from ~/Documents:
     import sys
     if (sys.platform == "darwin" and os.uname().machine.startswith("iP")):
         if not os.access(os_path, os.X_OK | os.W_OK):
             os_path = os.path.join(os.path.expanduser('~'), 'Documents')
     return os_path
示例#12
0
 def _notebook_model(self, path, content=True):
     """Build a notebook model
     if content is requested, the notebook content will be populated
     as a JSON structure (not double-serialized)
     """
     model = self._base_model(path)
     model['type'] = 'notebook'
     if content:
         hdfs_path = to_os_path(path, self.root_dir)
         nb = self._read_notebook(hdfs_path, as_version=4)
         self.mark_trusted_cells(nb, path)
         model['content'] = nb
         model['format'] = 'json'
         self.validate_notebook_model(model)
     return model
示例#13
0
 def is_hidden(self, path):
     """Is path a hidden directory or file?
     Parameters
     ----------
     path : string
         The path to check. This is an API path (`/` separated,
         relative to root dir).
     Returns
     -------
     hidden : bool
         Whether the path is hidden.
     """
     path = path.strip('/')
     hdfs_path = to_os_path(path, self.root_dir)
     return self._hdfs_is_hidden(hdfs_path)
示例#14
0
    def get_git_notebooks(self,
                          file_path_arg,
                          ref_base='HEAD',
                          ref_remote=None):
        """
        Gets the content of the before and after state of the notebook based on the given Git refs.

        :param file_path_arg: The path to the file being diffed
        :param ref_base: the Git ref for the "local" or the "previous" state
        :param ref_remote: the Git ref for the "remote" or the "current" state
        :return: (base_nb, remote_nb)
        """
        # Sometimes the root dir of the files is not cwd
        nb_root = getattr(self.contents_manager, 'root_dir', None)
        # Resolve base argument to a file system path
        file_path = os.path.realpath(to_os_path(file_path_arg, nb_root))

        # Ensure path/root_dir that can be sent to git:
        try:
            git_root = find_repo_root(file_path)
        except InvalidGitRepositoryError as e:
            self.log.exception(e)
            raise HTTPError(422, 'Invalid notebook: %s' % file_path)
        file_path = os.path.relpath(file_path, git_root)

        # Get the base/remote notebooks:
        try:
            for fbase, fremote in changed_notebooks(ref_base, ref_remote,
                                                    file_path, git_root):
                base_nb = read_notebook(fbase, on_null='minimal')
                remote_nb = read_notebook(fremote, on_null='minimal')
                break  # there should only ever be one set of files
            else:
                # The filename was either invalid or the file is unchanged
                # Assume unchanged, and let read_notebook handle error
                # reporting if invalid
                base_nb = self.read_notebook(os.path.join(git_root, file_path))
                remote_nb = base_nb
        except (InvalidGitRepositoryError, BadName) as e:
            self.log.exception(e)
            raise HTTPError(422, 'Invalid notebook: %s' % file_path_arg)
        except GitCommandNotFound as e:
            self.log.exception(e)
            raise HTTPError(
                500, 'Could not find git executable. '
                'Please ensure git is available to the server process.')

        return base_nb, remote_nb
示例#15
0
    def dir_exists(self, path):
        """Does a directory exist at the given path?
        Like os.path.isdir
        Parameters
        ----------
        path : string
            The relative API style path to check
        Returns
        -------
        exists : bool
            Whether the path does indeed exist.
        """
        path = path.strip('/')
        hdfs_path = to_os_path(path, self.root_dir)

        return self._hdfs_dir_exists(hdfs_path)
示例#16
0
 def file_exists(self, path=''):
     """Does a file exist at the given path?
     Like os.path.isfile
     Override this method in subclasses.
     Parameters
     ----------
     path : string
         The API path of a file to check for.
     Returns
     -------
     exists : bool
         Whether the file exists.
     """
     path = path.strip('/')
     hdfs_path = to_os_path(path, self.root_dir)
     return self._hdfs_file_exists(hdfs_path)
示例#17
0
    def exists(self, path):
        """Does a file or directory exist at the given path?
        Like os.path.exists
        Parameters
        ----------
        path : string
            The API path of a file or directory to check for.
        Returns
        -------
        exists : bool
            Whether the target exists.
        """

        path = path.strip('/')
        hdfs_path = to_os_path(path, self.root_dir)

        return self._hdfs_exists(hdfs_path)
示例#18
0
    def _get_os_path(self, path):
        """Given an API path, return its file system path.

        Parameters
        ----------
        path : string
            The relative API path to the named file.

        Returns
        -------
        path : string
            Native, absolute OS path to for a file.

        Raises
        ------
        404: if path is outside root
        """
        root = os.path.abspath(self.root_dir)
        os_path = to_os_path(path, root)
        if not (os.path.abspath(os_path) + os.path.sep).startswith(root):
            raise HTTPError(404, "%s is outside root contents directory" % path)
        return os_path
示例#19
0
    def _get_os_path(self, path):
        """Given an API path, return its file system path.

        Parameters
        ----------
        path : string
            The relative API path to the named file.

        Returns
        -------
        path : string
            Native, absolute OS path to for a file.

        Raises
        ------
        404: if path is outside root
        """
        root = os.path.abspath(self.root_dir)
        os_path = to_os_path(path, root)
        if not (os.path.abspath(os_path) + os.path.sep).startswith(root):
            raise HTTPError(404, "%s is outside root contents directory" % path)
        return os_path
示例#20
0
    def _get_git_notebooks(self, base_arg):
        # Sometimes the root dir of the files is not cwd
        nb_root = getattr(self.contents_manager, 'root_dir', None)
        # Resolve base argument to a file system path
        base = os.path.realpath(to_os_path(base_arg, nb_root))

        # Ensure path/root_dir that can be sent to git:
        try:
            git_root = find_repo_root(base)
        except InvalidGitRepositoryError as e:
            self.log.exception(e)
            raise HTTPError(422, 'Invalid notebook: %s' % base)
        base = os.path.relpath(base, git_root)

        # Get the base/remote notebooks:
        try:
            for fbase, fremote in changed_notebooks('HEAD', None, base, git_root):
                base_nb = read_notebook(fbase, on_null='minimal')
                remote_nb = read_notebook(fremote, on_null='minimal')
                break  # there should only ever be one set of files
            else:
                # The filename was either invalid or the file is unchanged
                # Assume unchanged, and let read_notebook handle error
                # reporting if invalid
                base_nb = self.read_notebook(os.path.join(git_root, base))
                remote_nb = base_nb
        except (InvalidGitRepositoryError, BadName) as e:
            self.log.exception(e)
            raise HTTPError(422, 'Invalid notebook: %s' % base_arg)
        except GitCommandNotFound as e:
            self.log.exception(e)
            raise HTTPError(
                500, 'Could not find git executable. '
                     'Please ensure git is available to the server process.')

        return base_nb, remote_nb
示例#21
0
 def to_os_path(self, api_path):
     return to_os_path(api_path, root=self.notebook_dir)
示例#22
0
    def save(self, model, path=''):
        """
                    Save a file or directory model to path.
                    Should return the saved model with no content.  Save implementations
                    should call self.run_pre_save_hook(model=model, path=path) prior to
                    writing any data.
                    """
        path = path.strip('/')

        if 'type' not in model:
            raise web.HTTPError(400, u'No file type provided')
        if 'content' not in model and model['type'] != 'directory':
            raise web.HTTPError(400, u'No file content provided')

        path = path.strip('/')
        hdfs_path = to_os_path(path, self.root_dir)
        self.log.info("Saving %s size=%d type=%s format=%s chunk=%d writable=%s",
                      hdfs_path,
                      len(model['content']) if 'content' in model else 'None',
                      model['type'],
                      model['format'] if 'format' in model else 'None',
                      model['chunk'] if 'chunk' in model else 0,
                      str(model['writable']) if 'writable' in model else 'None')

        self.run_pre_save_hook(model=model, path=path)

        try:
            if model['type'] == 'notebook':
                nb = nbformat.from_dict(model['content'])
                self.check_and_sign(nb, path)
                self._save_notebook(hdfs_path, nb)
                # One checkpoint should always exist for notebooks.
                if not self.checkpoints.list_checkpoints(path):
                    self.create_checkpoint(path)
            elif model['type'] == 'file':
                # Missing format will be handled internally by _save_file.
                # large file are saved in chunks
                # model['chunk'] is 1 for the first chunk
                # chunks numbered greater than 1 are appended to the file
                if self.is_compressed_file(hdfs_path) and model['format'] == 'text':
                    raise HTTPError(500, u'Detected compressed file format - text is not editable')
                append = True
                if 'chunk' not in model or model['chunk'] == 1:
                    append = False
                self._save_file(hdfs_path, model['content'], model.get('format'), append)
            elif model['type'] == 'directory':
                self._save_directory(hdfs_path, model, path)
            else:
                raise HTTPError(400, "Unhandled hdfscontents type: %s" % model['type'])
        except HTTPError:
            raise
        except Exception as e:
            self.log.error(u'Error while saving file: %s %s', path, e, exc_info=True)
            raise HTTPError(500, u'Unexpected error while saving file: %s %s' % (path, e))

        validation_message = None
        if model['type'] == 'notebook':
            self.validate_notebook_model(model)
            validation_message = model.get('message', None)

        model = self.get(path, content=False)
        if validation_message:
            model['message'] = validation_message

        # self.run_post_save_hook(model=model, os_path=hdfs_path)

        return model
示例#23
0
 def to_os_path(self, api_path):
     return to_os_path(api_path, root=self.td.name)
示例#24
0
 def to_os_path(self, api_path):
     return to_os_path(api_path, root=self.notebook_dir)
示例#25
0
    def _get_hdfs_path(self, path):

        return to_os_path(path, self.root_dir)