示例#1
0
 def test_file_in_subfolder(self):
     # Test with and without trailing slash
     ret = file_util.file_is_in_folder_glob("/a/b/c/foo.py", "/a")
     self.assertTrue(ret)
     ret = file_util.file_is_in_folder_glob("/a/b/c/foo.py", "/a/")
     self.assertTrue(ret)
     ret = file_util.file_is_in_folder_glob("/a/b/c/foo.py", "/a/b")
     self.assertTrue(ret)
     ret = file_util.file_is_in_folder_glob("/a/b/c/foo.py", "/a/b/")
     self.assertTrue(ret)
示例#2
0
 def _file_should_be_hashed(self, filename):
     filepath = os.path.abspath(filename)
     file_is_blacklisted = self._folder_black_list.is_blacklisted(filepath)
     # Short circuiting for performance.
     if file_is_blacklisted:
         return False
     return file_util.file_is_in_folder_glob(
         filepath, self._get_main_script_directory()
     ) or file_util.file_in_pythonpath(filepath)
示例#3
0
    def is_blacklisted(self, filepath):
        """Test if filepath is in the blacklist.

        Parameters
        ----------
        filepath : str
            File path that we intend to test.

        """
        return any(
            file_util.file_is_in_folder_glob(filepath, blacklisted_folder)
            for blacklisted_folder in self._folder_blacklist)
示例#4
0
    def _file_should_be_hashed(self, filename: str) -> bool:
        global _FOLDER_BLACK_LIST

        if not _FOLDER_BLACK_LIST:
            _FOLDER_BLACK_LIST = FolderBlackList(
                config.get_option("server.folderWatchBlacklist"))

        filepath = os.path.abspath(filename)
        file_is_blacklisted = _FOLDER_BLACK_LIST.is_blacklisted(filepath)
        # Short circuiting for performance.
        if file_is_blacklisted:
            return False
        return file_util.file_is_in_folder_glob(
            filepath, self._get_main_script_directory()
        ) or file_util.file_in_pythonpath(filepath)
示例#5
0
 def _file_should_be_watched(self, filepath):
     # Using short circuiting for performance.
     return self._file_is_new(filepath) and (
         file_util.file_is_in_folder_glob(filepath,
                                          self._session_data.script_folder)
         or file_util.file_in_pythonpath(filepath))
示例#6
0
    def _to_bytes(self, obj, context):
        """Hash objects to bytes, including code with dependencies.
        Python's built in `hash` does not produce consistent results across
        runs."""

        try:
            if _is_magicmock(obj):
                # MagicMock can result in objects that appear to be infinitely
                # deep, so we don't try to hash them at all.
                return self.to_bytes(id(obj))
            elif isinstance(obj, bytes) or isinstance(obj, bytearray):
                return obj
            elif isinstance(obj, string_types):  # noqa: F821
                # Don't allow the user to override string since
                # str == bytes on python 2
                return obj.encode()
            elif type(obj) in self.hash_funcs:
                # Escape hatch for unsupported objects
                return self.to_bytes(self.hash_funcs[type(obj)](obj))
            elif isinstance(obj, float):
                return self.to_bytes(hash(obj))
            elif isinstance(obj, int):
                return _int_to_bytes(obj)
            elif isinstance(obj, list) or isinstance(obj, tuple):
                h = hashlib.new(self.name)
                # add type to distingush x from [x]
                self._update(h, type(obj).__name__.encode() + b":")
                for e in obj:
                    self._update(h, e, context)
                return h.digest()
            elif obj is None:
                # Special string since hashes change between sessions.
                # We don't use Python's `hash` since hashes are not consistent
                # across runs.
                return b"none:"
            elif obj is True:
                return b"bool:1"
            elif obj is False:
                return b"bool:0"
            elif type_util.is_type(
                    obj, "pandas.core.frame.DataFrame") or type_util.is_type(
                        obj, "pandas.core.series.Series"):
                import pandas as pd

                if len(obj) >= PANDAS_ROWS_LARGE:
                    obj = obj.sample(n=PANDAS_SAMPLE_SIZE, random_state=0)
                try:
                    return pd.util.hash_pandas_object(obj).sum()
                except TypeError:
                    # Use pickle if pandas cannot hash the object for example if
                    # it contains unhashable objects.
                    return pickle.dumps(obj, pickle.HIGHEST_PROTOCOL)
            elif type_util.is_type(obj, "numpy.ndarray"):
                h = hashlib.new(self.name)
                self._update(h, obj.shape)

                if obj.size >= NP_SIZE_LARGE:
                    import numpy as np

                    state = np.random.RandomState(0)
                    obj = state.choice(obj.flat, size=NP_SAMPLE_SIZE)

                self._update(h, obj.tobytes())
                return h.digest()
            elif inspect.isbuiltin(obj):
                return self.to_bytes(obj.__name__)
            elif hasattr(obj, "name") and (
                    isinstance(obj, io.IOBase) or
                (isinstance(obj.name, string_types)  # noqa: F821
                 and os.path.exists(obj.name))):
                # Hash files as name + last modification date + offset.
                h = hashlib.new(self.name)
                self._update(h, obj.name)
                self._update(h, os.path.getmtime(obj.name))
                self._update(h, obj.tell())
                return h.digest()
            elif inspect.isroutine(obj):
                if hasattr(obj, "__wrapped__"):
                    # Ignore the wrapper of wrapped functions.
                    return self.to_bytes(obj.__wrapped__)

                if obj.__module__.startswith("streamlit"):
                    # Ignore streamlit modules even if they are in the CWD
                    # (e.g. during development).
                    return self.to_bytes("%s.%s" %
                                         (obj.__module__, obj.__name__))

                h = hashlib.new(self.name)
                filepath = os.path.abspath(obj.__code__.co_filename)

                if file_util.file_is_in_folder_glob(
                        filepath, self._get_main_script_directory()
                ) and not self._folder_black_list.is_blacklisted(filepath):
                    context = _get_context(obj)
                    if obj.__defaults__:
                        self._update(h, obj.__defaults__, context)
                    h.update(self._code_to_bytes(obj.__code__, context))
                else:
                    # Don't hash code that is not in the current working directory.
                    self._update(h, obj.__module__)
                    self._update(h, obj.__name__)
                return h.digest()
            elif inspect.iscode(obj):
                return self._code_to_bytes(obj, context)
            elif inspect.ismodule(obj):
                # TODO: Figure out how to best show this kind of warning to the
                # user. In the meantime, show nothing. This scenario is too common,
                # so the current warning is quite annoying...
                # st.warning(('Streamlit does not support hashing modules. '
                #             'We did not hash `%s`.') % obj.__name__)
                # TODO: Hash more than just the name for internal modules.
                return self.to_bytes(obj.__name__)
            elif inspect.isclass(obj):
                # TODO: Figure out how to best show this kind of warning to the
                # user. In the meantime, show nothing. This scenario is too common,
                # (e.g. in every "except" statement) so the current warning is
                # quite annoying...
                # st.warning(('Streamlit does not support hashing classes. '
                #             'We did not hash `%s`.') % obj.__name__)
                # TODO: Hash more than just the name of classes.
                return self.to_bytes(obj.__name__)
            elif isinstance(obj, functools.partial):
                # The return value of functools.partial is not a plain function:
                # it's a callable object that remembers the original function plus
                # the values you pickled into it. So here we need to special-case it.
                h = hashlib.new(self.name)
                self._update(h, obj.args)
                self._update(h, obj.func)
                self._update(h, obj.keywords)
                return h.digest()
            else:
                try:
                    # As a last resort, we pickle the object to hash it.
                    return pickle.dumps(obj, pickle.HIGHEST_PROTOCOL)
                except:
                    st.warning(
                        _hashing_error_message(
                            "Streamlit cannot hash an object of type %s." %
                            type(obj)))
        except:
            st.warning(
                _hashing_error_message(
                    "Streamlit failed to hash an object of type %s." %
                    type(obj)))
示例#7
0
 def test_rel_file_not_in_folder_glob(self):
     ret = file_util.file_is_in_folder_glob("foo.py", "")
     self.assertTrue(ret)
示例#8
0
 def test_file_not_in_folder_glob(self):
     ret = file_util.file_is_in_folder_glob("/a/b/c/foo.py", "**/f")
     self.assertFalse(ret)
示例#9
0
 def test_file_in_folder_glob(self):
     ret = file_util.file_is_in_folder_glob("/a/b/c/foo.py", "**/c")
     self.assertTrue(ret)
示例#10
0
 def test_rel_file_not_in_folder(self):
     # Test with and without trailing slash
     ret = file_util.file_is_in_folder_glob("foo.py", "/d/e/f/")
     self.assertFalse(ret)
     ret = file_util.file_is_in_folder_glob("foo.py", "/d/e/f")
     self.assertFalse(ret)
    def update_watched_modules(self):
        if self._is_closed:
            return

        local_filepaths = []

        # Clone modules dict here because we may alter the original dict inside
        # the loop.
        modules = dict(sys.modules)

        for name, module in modules.items():
            try:
                spec = getattr(module, "__spec__", None)

                if spec is None:
                    filepath = getattr(module, "__file__", None)
                    if filepath is None:
                        # Some modules have neither a spec nor a file. But we
                        # can ignore those since they're not the user-created
                        # modules we want to watch anyway.
                        continue
                else:
                    filepath = spec.origin

                if filepath is None:
                    # Built-in modules (and other stuff) don't have origins.
                    continue

                filepath = os.path.abspath(filepath)

                if not os.path.isfile(filepath):
                    # There are some modules that have a .origin, but don't
                    # point to real files. For example, there's a module where
                    # .origin is 'built-in'.
                    continue

                if self._folder_black_list.is_blacklisted(filepath):
                    continue

                file_is_new = filepath not in self._watched_modules
                file_is_local = file_util.file_is_in_folder_glob(
                    filepath, self._report.script_folder
                )

                local_filepaths.append(filepath)

                if file_is_local and file_is_new:
                    self._register_watcher(filepath, name)

            except Exception:
                # In case there's a problem introspecting some specific module,
                # let's not stop the entire loop from running.  For example,
                # the __spec__ field in some modules (like IPython) is actually
                # a dynamic property, which can crash if the underlying
                # module's code has a bug (as discovered by one of our users).
                continue

        # Clone dict here because we may alter the original dict inside the
        # loop.
        watched_modules = dict(self._watched_modules)

        # Remove no-longer-depended-on files from self._watched_modules
        # Will this ever happen?
        for filepath in watched_modules:
            if filepath not in local_filepaths:
                self._deregister_watcher(filepath)