Пример #1
0
    def __init__(
        self,
        filepath: PurePosixPath,
        version: Optional[Version],
        exists_function: Callable[[str], bool] = None,
        glob_function: Callable[[str], List[str]] = None,
    ):
        """Creates a new instance of ``AbstractVersionedDataSet``.

        Args:
            filepath: Filepath in POSIX format to a file.
            version: If specified, should be an instance of
                ``kedro.io.core.Version``. If its ``load`` attribute is
                None, the latest version will be loaded. If its ``save``
                attribute is None, save version will be autogenerated.
            exists_function: Function that is used for determining whether
                a path exists in a filesystem.
            glob_function: Function that is used for finding all paths
                in a filesystem, which match a given pattern.
        """
        self._filepath = filepath
        self._version = version
        self._exists_function = exists_function or _local_exists
        self._glob_function = glob_function or iglob
        # 1 entry for load version, 1 for save version
        self._version_cache = Cache(maxsize=2)
Пример #2
0
 def __init__(self, maxsize, ttl=MAXTTL, timer=time.time, getsizeof=None):
     Cache.__init__(self, maxsize, getsizeof)
     self.__root = root = _Link()
     root.prev = root.next = root
     self.__links = collections.OrderedDict()
     self.__timer = _Timer(timer)
     self.__ttl = ttl
     # CHANGE: .set() is the same as setitem
     self.set = self.__setitem__
Пример #3
0
 def __init__(self,
              maxsize,
              ttl,
              expiry_callback=None,
              timer=default_timer,
              getsizeof=None):
     Cache.__init__(self, maxsize, getsizeof)
     self.__root = root = _Link()
     root.prev = root.next = root
     self.__links = collections.OrderedDict()
     self.__timer = _Timer(timer)
     self.__ttl = ttl
     self.__expiry_callback = expiry_callback
Пример #4
0
def evaluate_agent(agent, game_args, cache_size=144):
    # Try and prevent agents from using more than 1 thread
    torch.set_num_threads(1)

    game = Game(**game_args, view_size=agent.view)

    cache = Cache(cache_size)
    idle_detector = IdleDetector(False)

    while game.game_over == False:
        player_view = game.get_player_view()

        view_hashable = player_view.tobytes()

        # Check cache
        if view_hashable in cache:
            keys = cache[view_hashable]
        else:
            keys = agent.evaluate(player_view)
            if cache_size > 0:
                cache[view_hashable] = keys

        game.update(keys)

        if idle_detector.update(game.player.tile) is True:
            game.game_over = True
            game.game_over_type = Game.PLAYER_TIMEOUT

    return (game.player.fitness, game.game_over_type)
Пример #5
0
    def __init__(self):
        #Web3 instance connecting to node
        self.w3 = Web3(Web3.HTTPProvider(NODE_URL))
        
	#Cache with 3 categories fast, medium ,slow
        self.priceCache = priceCache = Cache(maxsize=3)
        
	#Caching Parameters
        self.cacheInterval = CACHE_INTERVAL
        self.blocksToCache = 150
        
	# Construct Cache with  with LRU Cache and 150 items
        self.block_hash_cache_middleware = construct_simple_cache_middleware(
            cache_class=partial(LRUCache, self.blocksToCache),
            rpc_whitelist='eth_getBlockByHash'
        )
    	#Adding caching to middle ware
        self.w3.middleware_stack.add(self.block_hash_cache_middleware)

        #Faucet Account initalization with priavte Key
        self.faucetAccount = Account.privateKeyToAccount(ETH_PRIVATE_KEY)
	
	#Transaction parameters of faucet
        self.txGas = 314150
        self.txGasPrice = 20000000000
        self.chainId = 3
        self.txData = '53656e742066726f6d20676173466175636574202a2e2a'
class CredentialProvider:
    credentials = None
    cache = Cache(maxsize=10)

    CREDENTIAL_PROVIDERS = [
        FromCodeCredentialProvider, FromEnvironmentVariablesCredentialProvider,
        FromSecretsCredentialProvider, FromConfigFileCredentialProvider
    ]

    def __init__(self, account='default', credentials=None):
        self.account = account
        for cp in self.CREDENTIAL_PROVIDERS:
            try:
                self.credentials = cp(account=account,
                                      credentials=credentials)()
                break
            except MissingCredentials:
                continue
        if self.credentials:
            self.credentials = self.Config(**self.credentials)
        else:
            raise MissingCredentials(
                f'Credentials are missing: {", ".join(required_credentials)}')

    class Config:
        def __init__(self, **kwargs):
            self.refresh_token = kwargs.get('refresh_token')
            self.lwa_app_id = kwargs.get('lwa_app_id')
            self.lwa_client_secret = kwargs.get('lwa_client_secret')
            self.aws_access_key = kwargs.get('aws_access_key')
            self.aws_secret_key = kwargs.get('aws_secret_key')
            self.role_arn = kwargs.get('role_arn')
Пример #7
0
    def fast_match(self, cluster_ids: list, tokens):
        match_cluster = None

        max_sim = -1
        max_param_count = -1
        max_cluster = None

        for cluster_id in cluster_ids:
            # Try to retrieve cluster from cache with bypassing eviction
            # algorithm as we are only testing candidates for a match.
            cluster = Cache.get(self.id_to_cluster, cluster_id)
            if cluster is None:
                continue
            cur_sim, param_count = self.get_seq_distance(
                cluster.log_template_tokens, tokens)
            if cur_sim > max_sim or (cur_sim == max_sim
                                     and param_count > max_param_count):
                max_sim = cur_sim
                max_param_count = param_count
                max_cluster = cluster

        if max_sim >= self.sim_th:
            match_cluster = max_cluster

        return match_cluster
Пример #8
0
    def __init__(self,
                 maxsize,
                 client=None,
                 host=None,
                 port=None,
                 password=None,
                 db=None,
                 ttl=15 * 60,
                 clear_on_exit=False,
                 key_prefix='RedisCache'):
        Cache.__init__(self, maxsize, None)
        self.client_ = client
        self.host = host or redis_config.HOST
        self.port = port or redis_config.PORT
        self.password = password or redis_config.PASSWORD
        self.db = db or redis_config.DB

        self.ttl = ttl
        self.key_prefix = key_prefix
        if clear_on_exit:
            atexit.register(self.clear())
Пример #9
0
def main(img_file_path='images/1.jpeg', final_img_file_path='images/result_images/mosaic.jpeg',
         tile_directory='images/image_set', tile_size=2000, tile_to_user_img_pixel_ratio=100, user_img_len=1000,
         user_img_breadth=1000, cache_size=10):
    """
    :param img_file_path: The image that needs to be made as a mosaic
    :param final_img_file_path: Final mosaic image will be stored at this path
    :param tile_directory: The directory from which images are picked up to make part of mosaic image
    :param tile_size: Size in pixels of each tile in the final mosaic image
    :param tile_to_user_img_pixel_ratio: This will decide the number of sub images inside the mosaic
    :param user_img_len: The image that needs to be made as a mosaic will be resized to this
    :param user_img_breadth: The image that needs to be made as a mosaic will be resized to this
    :param cache_size: Number of images stored in memory. Depending on the RAM of the machine need to control this

    The function creates a mosaic image at final_img_file_path
    The final image will contain the following number of sub images:
        (user_img_len/tile_to_user_img_pixel_ratio) * (user_img_breadth/tile_to_user_img_pixel_ratio)
    tile_size decides the number of pixels each sub image will contain, so this will decide the clarity of each image.
        Higher the number, higher the quality with the downside being increased size
    """
    cache = Cache(cache_size)

    tiles_data = []
    tile_file_names = []
    for root, subFolders, files in os.walk(tile_directory):
        for tile_name in files:
            if tile_name.endswith(".jpeg") or tile_name.endswith(".JPG"):
                tile_file_names.append(os.path.join(root, tile_name))
    user_img = Image.open(img_file_path)
    user_img = user_img.resize((user_img_len, user_img_breadth))
    user_img_data = np.array(user_img)
    column_list = []
    img_index_dict = {}
    for r in range(user_img_breadth):
        row_list = []
        for c in range(user_img_len):
            index = ((r % tile_to_user_img_pixel_ratio), (c % tile_to_user_img_pixel_ratio))
            img_index = (int(r / tile_to_user_img_pixel_ratio), int(c / tile_to_user_img_pixel_ratio))
            if img_index not in img_index_dict:
                img_index_dict[img_index] = random.randrange(len(tile_file_names))
            tile_split_img = get_tile_split_img_dict(cache, img_index_dict[img_index], tile_file_names, tile_size, tile_to_user_img_pixel_ratio)[index]
            tint_img = Image.new('RGB', tile_split_img.size, tuple(user_img_data[r, c]))
            new_data = np.array(Image.blend(tile_split_img, tint_img, 0.8))
            print(r, c, user_img_breadth, user_img_len, tile_size, user_img_data[r, c])
            row_list.append(new_data)
        column_list.append(np.concatenate(row_list, axis=1))
        del row_list
    new_img_data = np.concatenate(column_list, axis=0)
    del column_list
    new_img = Image.fromarray(new_img_data).convert('RGB')
    new_img.save(final_img_file_path)
Пример #10
0
    def __init__(self, *args, **kwargs):
        super(ALGO, self).__init__(*args, **kwargs)

        self.base_url = 'http://al.go.leg.br'

        self.institution, _ = Institution.objects.get_or_create(
            siglum='ALGO', name=u'Assembléia Legislativa do Estado de Goiás'
        )

        self.legislature, _ = Legislature.objects.get_or_create(
            institution=self.institution,
            date_start=datetime(2015, 1, 1),
            date_end=datetime(2018, 12, 31)
        )

        self.list_of_legislators_cache = Cache(1024)
        self.expenses_nature_cached = {}
Пример #11
0
 def __getitem__(self, key):
     self._key_check(key)
     try:
         value = Cache.__getitem__(self, key)
     except KeyError:
         try:
             value = self._second_gen[key]
         except KeyError:
             try:
                 value = self._get_item(key)
             except KeyError as ke3:
                 raise ke3
             else:
                 self.__setitem__(key, value)
         else:
             self.__setitem__(key, value)
             if key in self._second_gen:   # the second gen clean up could be triggered during set in first gen
                 del self._second_gen[key]
     else:
         self._update_order(key)
     return value
Пример #12
0
 def __getitem__(self, key):
     self._key_check(key)
     try:
         value = Cache.__getitem__(self, key)
     except KeyError:
         try:
             value = self._second_gen[key]
         except KeyError:
             try:
                 value = self._get_item(key)
             except KeyError as ke3:
                 raise ke3
             else:
                 self.__setitem__(key, value)
         else:
             self.__setitem__(key, value)
             if key in self._second_gen:   # the second gen clean up could be triggered during set in first gen
                 del self._second_gen[key]
     else:
         self._update_order(key)
     return value
Пример #13
0
    def __init__(self,
                 delay=1.5,
                 timeout=15,
                 init_cache=False,
                 executable_path="geckodriver",
                 log_path="geckodriver.log",
                 cache=Cache(24),
                 logger=create_logger("milanuncios"),
                 debug=False,
                 firefox_binary="/usr/bin/firefox",
                 display=False):
        self.main_url = "https://www.milanuncios.com"

        self.timeout = timeout
        self.delay = delay
        self.debug = debug
        self.init_cache = init_cache

        self.logger = logger
        if self.debug:
            self.logger.setLevel(logging.DEBUG)
        self.cache = cache

        self._executable_path = executable_path
        self._log_path = log_path
        self._firefox_binary = firefox_binary

        # Attributes defined on __enter__
        self.session = None
        self.firefox_user_processes = None
        self.browser = None
        self.browser_pid = None

        self.display = display

        # Account methods
        self.logged = False
        self._logged_soup = None
Пример #14
0
    async def exists_cached(
        self, cache: Cache, memory: bool = True, **filters: Any,
    ) -> bool:
        id = filters.pop(self.repository.id_name, None)

        if id is None:
            raise RequiredKeyAttributeError(
                type(self.repository).__name__,
                self.repository.id_name,
                self.repository.key_attrs,
            )

        cache_key = self.cache_key(id, self.cache_key_suffix(**filters))
        entity_exists = cache.get(cache_key)

        if entity_exists is None:
            filters[self.repository.id_name] = id

            if memory:
                entity_exists = await self.exists_circuit(
                    self.repository.query(**filters)
                )
            else:
                entity_exists = await self.exists_fallback_circuit(
                    self.repository.query(memory=False, **filters)
                )

            if not entity_exists:
                cache[cache_key] = CACHE_ALREADY_NOT_FOUND
                return False
            else:
                cache[cache_key] = True

        elif entity_exists is CACHE_ALREADY_NOT_FOUND:
            return False

        return True
Пример #15
0
class PartitionedDataSet(AbstractDataSet):
    # pylint: disable=too-many-instance-attributes,protected-access
    """``PartitionedDataSet`` loads and saves partitioned file-like data using the
    underlying dataset definition. For filesystem level operations it uses `fsspec`:
    https://github.com/intake/filesystem_spec.

    Example:
    ::

        >>> import pandas as pd
        >>> from kedro.io import PartitionedDataSet
        >>>
        >>> # these credentials will be passed to both 'fsspec.filesystem()' call
        >>> # and the dataset initializer
        >>> credentials = {"key1": "secret1", "key2": "secret2"}
        >>>
        >>> data_set = PartitionedDataSet(
        >>>     path="s3://bucket-name/path/to/folder",
        >>>     dataset="CSVDataSet",
        >>>     credentials=credentials
        >>> )
        >>> loaded = data_set.load()
        >>> # assert isinstance(loaded, dict)
        >>>
        >>> combine_all = pd.DataFrame()
        >>>
        >>> for partition_id, partition_load_func in loaded.items():
        >>>     partition_data = partition_load_func()
        >>>     combine_all = pd.concat(
        >>>         [combine_all, partition_data], ignore_index=True, sort=True
        >>>     )
        >>>
        >>> new_data = pd.DataFrame({"new": [1, 2]})
        >>> # creates "s3://bucket-name/path/to/folder/new/partition.csv"
        >>> data_set.save({"new/partition.csv": new_data})
        >>>
    """
    def __init__(  # pylint: disable=too-many-arguments
        self,
        path: str,
        dataset: Union[str, Type[AbstractDataSet], Dict[str, Any]],
        filepath_arg: str = "filepath",
        filename_suffix: str = "",
        credentials: Dict[str, Any] = None,
        load_args: Dict[str, Any] = None,
        fs_args: Dict[str, Any] = None,
    ):
        """Creates a new instance of ``PartitionedDataSet``.

        Args:
            path: Path to the folder containing partitioned data.
                If path starts with the protocol (e.g., ``s3://``) then the
                corresponding ``fsspec`` concrete filesystem implementation will
                be used. If protocol is not specified,
                ``fsspec.implementations.local.LocalFileSystem`` will be used.
                **Note:** Some concrete implementations are bundled with ``fsspec``,
                while others (like ``s3`` or ``gcs``) must be installed separately
                prior to usage of the ``PartitionedDataSet``.
            dataset: Underlying dataset definition. This is used to instantiate
                the dataset for each file located inside the ``path``.
                Accepted formats are:
                a) object of a class that inherits from ``AbstractDataSet``
                b) a string representing a fully qualified class name to such class
                c) a dictionary with ``type`` key pointing to a string from b),
                other keys are passed to the Dataset initializer.
                Credentials for the dataset can be explicitly specified in
                this configuration.
            filepath_arg: Underlying dataset initializer argument that will
                contain a path to each corresponding partition file.
                If unspecified, defaults to "filepath".
            filename_suffix: If specified, only partitions that end with this
                string will be processed.
            credentials: Protocol-specific options that will be passed to
                ``fsspec.filesystem``
                https://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.filesystem
                and the dataset initializer. If the dataset config contains
                explicit credentials spec, then such spec will take precedence.
                All possible credentials management scenarios are documented here:
                https://kedro.readthedocs.io/en/stable/04_user_guide/08_advanced_io.html#partitioned-dataset-credentials
            load_args: Keyword arguments to be passed into ``find()`` method of
                the filesystem implementation.
            fs_args: Extra arguments to pass into underlying filesystem class constructor
                (e.g. `{"project": "my-project"}` for ``GCSFileSystem``)

        Raises:
            DataSetError: If versioning is enabled for the underlying dataset.
        """
        # pylint: disable=import-outside-toplevel
        from fsspec.utils import infer_storage_options  # for performance reasons

        super().__init__()

        self._path = path
        self._filename_suffix = filename_suffix
        self._protocol = infer_storage_options(self._path)["protocol"]
        self._partition_cache = Cache(maxsize=1)

        dataset = dataset if isinstance(dataset, dict) else {"type": dataset}
        self._dataset_type, self._dataset_config = parse_dataset_definition(
            dataset)
        if VERSION_KEY in self._dataset_config:
            raise DataSetError(
                "`{}` does not support versioning of the underlying dataset. "
                "Please remove `{}` flag from the dataset definition.".format(
                    self.__class__.__name__, VERSIONED_FLAG_KEY))

        if credentials:
            if CREDENTIALS_KEY in self._dataset_config:
                self._logger.warning(
                    KEY_PROPAGATION_WARNING,
                    {
                        "keys": CREDENTIALS_KEY,
                        "target": "underlying dataset"
                    },
                )
            else:
                self._dataset_config[CREDENTIALS_KEY] = deepcopy(credentials)

        self._credentials = deepcopy(credentials) or {}

        self._fs_args = deepcopy(fs_args) or {}
        if self._fs_args:
            if "fs_args" in self._dataset_config:
                self._logger.warning(
                    KEY_PROPAGATION_WARNING,
                    {
                        "keys": "filesystem arguments",
                        "target": "underlying dataset"
                    },
                )
            else:
                self._dataset_config["fs_args"] = deepcopy(self._fs_args)

        self._filepath_arg = filepath_arg
        if self._filepath_arg in self._dataset_config:
            warn(
                "`{}` key must not be specified in the dataset definition as it "
                "will be overwritten by partition path".format(
                    self._filepath_arg))

        self._load_args = deepcopy(load_args) or {}
        self._sep = self._filesystem.sep
        # since some filesystem implementations may implement a global cache
        self._invalidate_caches()

    @property
    def _filesystem(self):
        # for performance reasons
        import fsspec  # pylint: disable=import-outside-toplevel

        protocol = "s3" if self._protocol in S3_PROTOCOLS else self._protocol
        return fsspec.filesystem(protocol, **self._credentials,
                                 **self._fs_args)

    @property
    def _normalized_path(self) -> str:
        if self._protocol in S3_PROTOCOLS:
            return urlparse(self._path)._replace(scheme="s3").geturl()
        return self._path

    @cachedmethod(cache=operator.attrgetter("_partition_cache"))
    def _list_partitions(self) -> List[str]:
        return [
            path for path in self._filesystem.find(self._normalized_path, **
                                                   self._load_args)
            if path.endswith(self._filename_suffix)
        ]

    def _join_protocol(self, path: str) -> str:
        if self._path.startswith(
                self._protocol) and not path.startswith(self._protocol):
            return f"{self._protocol}://{path}"
        return path

    def _partition_to_path(self, path: str):
        dir_path = self._path.rstrip(self._sep)
        path = path.lstrip(self._sep)
        full_path = self._sep.join([dir_path, path]) + self._filename_suffix
        return full_path

    def _path_to_partition(self, path: str) -> str:
        dir_path = self._filesystem._strip_protocol(self._normalized_path)
        path = path.split(dir_path, 1).pop().lstrip(self._sep)
        if self._filename_suffix and path.endswith(self._filename_suffix):
            path = path[:-len(self._filename_suffix)]
        return path

    def _load(self) -> Dict[str, Callable[[], Any]]:
        partitions = {}

        for partition in self._list_partitions():
            kwargs = deepcopy(self._dataset_config)
            # join the protocol back since PySpark may rely on it
            kwargs[self._filepath_arg] = self._join_protocol(partition)
            dataset = self._dataset_type(**kwargs)  # type: ignore
            partition_id = self._path_to_partition(partition)
            partitions[partition_id] = dataset.load

        if not partitions:
            raise DataSetError(f"No partitions found in `{self._path}`")

        return partitions

    def _save(self, data: Dict[str, Any]) -> None:
        for partition_id, partition_data in sorted(data.items()):
            kwargs = deepcopy(self._dataset_config)
            partition = self._partition_to_path(partition_id)
            # join the protocol back since tools like PySpark may rely on it
            kwargs[self._filepath_arg] = self._join_protocol(partition)
            dataset = self._dataset_type(**kwargs)  # type: ignore
            dataset.save(partition_data)
        self._invalidate_caches()

    def _describe(self) -> Dict[str, Any]:
        clean_dataset_config = ({
            k: v
            for k, v in self._dataset_config.items() if k != CREDENTIALS_KEY
        } if isinstance(self._dataset_config, dict) else self._dataset_config)
        return dict(
            path=self._path,
            dataset_type=self._dataset_type.__name__,
            dataset_config=clean_dataset_config,
        )

    def _invalidate_caches(self):
        self._partition_cache.clear()
        self._filesystem.invalidate_cache(self._normalized_path)

    def _exists(self) -> bool:
        return bool(self._list_partitions())

    def _release(self) -> None:
        super()._release()
        self._invalidate_caches()
Пример #16
0
    def __init__(self,
                 X,
                 Y,
                 R,
                 target_sparsity=0.01,
                 gamma0_v=1.0,
                 lambda_params=(1e-6, 1e-6),
                 nu_params=(1e-6, 1e-6),
                 xi=0.999999,
                 xi_prior_shape=(1, 1),
                 check_finite=True,
                 min_eigenval=0,
                 jitter=1e-6):
        """The Probit model used for modeling Sparse Regression using a Gaussian field. :cite:`Engelhardt2014`.

        .. math::

            y|X,\\beta,\\beta_0, \\nu \propto \mathcal{N}(\\beta_0 1_n + X \\beta, \\nu^{-1} I_n)

        Parameters
        ----------
        X : ndarray
           The predictor matrix of real numbers, n x p in size, where n is the no. of samples (genotypes) and p is the
           no. of features (SNPs).
        Y : ndarray
           The response vector of real numbers, n x 1 in size, with each value representing the phenotype value for the
           sample.
        R : ndarray
           The covariance matrix for the SNPs, p x p in size. The matrix may not be positive-definite, but is converted
           to one internally.
        target_sparsity : float
            The proportion of included predictors. For example, a value of 0.01 indicates that around 1% of total SNPs
            are expected be included in our model. This value affects the probit threshold gamma_0 of the model.
        gamma0_v : float
            Variance of the probit threshold gamma_0
        lambda_params : tuple
            Shape parameter and Inverse-scale parameter of the gamma prior placed on the model parameter lambda, where
            lambda is the inverse squared global scale parameter for the regression weights.
        nu_params : tuple
            Shape parameter and Inverse-scale parameter of the gamma prior placed on the model parameter nu, where nu
            is the residual precision.
        xi : float
            The shrinkage constant in the interval [0,1] to regularize the covariance matrix towards the identity
            matrix. This ensures that the covariance matrix is positive definite.
            A larger xi value biases our estimate towards the supplied R matrix, a lower value biases it towards the
            identity matrix.
            If None, then xi is sampled from a beta distribution with shape parameters specified by the tuple
            xi_prior_shape.
        xi_prior_shape : tuple
            Shape parameters of the beta prior placed on the model parameter xi, specified as a 2-tuple of real values.
            This argument is ignored and xi is not sampled, if it is specified explicitly using the xi parameter.
        check_finite : bool
            Whether to check that the input matrices contain only finite numbers. Disabling may give a performance gain,
            but may result in problems (crashes, non-termination) if the inputs do contain infinities or NaNs.
            This parameter is passed on to several linear algebra functions in scipy internally.
        min_eigenval : float
            Minimum Eigenvalue we can accept in the covariance matrix. Any eigenvalues encountered below this threshold
            are set to zero, and the resulting covariance matrix normalized to give ones on the diagonal.
        jitter : float
            A small value to add to the diagonals of the covariance matrix to avoid conditioning issues.
        """

        self.X = X
        self.Y = Y
        self.R = Mvn(cov=R, min_eigenval=min_eigenval, jitter=jitter)

        self.N, self.P = self.X.shape

        self.nu_a, self.nu_b = nu_params

        self.check_finite = check_finite

        if xi is None:
            self.sample_xi = True
            self._xi_distribution = beta(*xi_prior_shape)
            self.xi = self._xi_distribution.mean()
        else:
            self.sample_xi = False
            self.xi = xi

        # Initialize scalar model distributions and the parameter values to their prior means.
        self._gamma0_distribution = norm(loc=norm.ppf(1.0 - target_sparsity),
                                         scale=gamma0_v)
        self.gamma0 = self._gamma0_distribution.mean()
        self._lambda_distribution = gamma(lambda_params[0],
                                          scale=1. / lambda_params[1])
        self.lamb = self._lambda_distribution.mean()
        self._nu_distribution = gamma(self.nu_a, scale=1. / self.nu_b)
        self.nu = self._nu_distribution.mean()

        # Cache for holding probit prior distributions (multivariate normal distributions with 0 mean and known
        # covariance, possibly adjusted by a shrinkage factor xi expressing our confidence in the covariance).
        # A single iteration of MCMC calls on many computations on this distribution, so caching improves performance
        # significantly. A small cache size works just as well as a large one,
        # because the most recently used distribution tends to be used repeatedly in a single MCMC step.
        self._probit_cache = Cache(maxsize=4)

        # A cache used to hold the marginal PPI (Posterior Probability of Inclusion) distributions
        # p(y | X, gamma, gamma_0, nu, lambda) ~ Normal(..)
        # A small cache size works just as well as a large one, because the most recently used distribution tends to
        # be used repeatedly in a single MCMC step.
        self._ppi_cache = Cache(maxsize=8)

        # Initialize the sparsity function by generating a random variate from the model's probit distribution
        self.gamma = self.probit_distribution(self.xi).rvs()
Пример #17
0
    def __init__(self, config, hass_api=None):
        # Defaults to UTC now() - every interval
        self.plant_update = {}
        self.hass_api = hass_api
        self.logger = logger
        self.config = config
        self.cache = Cache(maxsize=10)
        self.start_total_energy = {}
        self.pasttime = datetime.combine(date.today(),
                                         datetime.min.time()).timestamp()
        self.dsmr_access = threading.Condition(threading.Lock())

        if self.config.get('default', 'debug', fallback=False):
            logger.setLevel(logging.DEBUG)

        if not self._init_data_fields():
            sys.exit(1)
        # init energy cache
        # total_energy_cache.json
        self.persistant_cache_file = self.config.get(
            'default',
            'persistant_cache_file',
            fallback='./persistant_cache.json')
        self._load_persistant_cache()
        # read data_fields
        try:
            with open(self.data_config_file) as json_file_config:
                self.config.data_field_config = json.load(json_file_config)
        except Exception as e:
            hybridlogger.ha_log(
                self.logger, self.hass_api, "ERROR",
                f"Error reading configuration file (data_fields.json). Exiting!. Error: {e.args}"
            )
            raise e
            sys.exit(1)

        # read attributes
        if not self._read_attributes():
            sys.exit(1)

        self.every = self._get_interval()
        self.interval_aggregated = self._get_interval_aggregated()

        # Make sure we check for a recent update first
        self.last_update_time = datetime.now(
            timezone.utc) - timedelta(seconds=self.interval_aggregated)

        # Initialize plugin paths
        sys.path.append(self.__expand_path('plugin_output'))
        sys.path.append(self.__expand_path('plugin_client'))
        sys.path.append(self.__expand_path('plugin_localproxy'))

        self.city = self.config.get('default', 'city', fallback='Amsterdam')
        try:
            self.dl = daylight(self.city)
        except Exception as e:
            hybridlogger.ha_log(
                self.logger, self.hass_api, "ERROR",
                f"City '{self.city}' not recognized. Error: {e}")
            sys.exit(1)

        # Initialize client
        self._init_client()
        # Initialize output plugins
        self._init_output_plugins()

        self.omnik_api_level = 0

        # Init dsmr
        self._init_dsmr()
Пример #18
0
#stock data provider
from cachetools import Cache, keys, cached
import pandas as pd

__local_cache = Cache(maxsize=42)


def __hash_key_for_2(data1, *args):
    return keys.hashkey(id(data1), *args)


@cached(cache=__local_cache, key=__hash_key_for_2)
def create_dataframe(all_data, name='close'):
    trading_data = {}
    for data in all_data:
        trading_data[data.stock_id] = data.data_frame[name]

    panel = pd.DataFrame(data=trading_data)

    return panel.fillna(method='pad')


def filter_dataframe(data, start_date=None, end_date=None):
    if start_date:
        start_date = pd.to_datetime(start_date)
    if end_date:
        end_date = pd.to_datetime(end_date)

    if not start_date and not end_date:
        return data
Пример #19
0
# coding: utf-8

import re
from pkgutil import get_data

import sqlparse
from cachetools import cached, Cache

from synchromoodle.dbutils import Database

__statements_cache = Cache(maxsize=100)


def init(db: Database):
    run_script('data/ddl.sql', db)


def reset(db: Database):
    run_script('data/ddl.sql', db)


@cached(__statements_cache)
def _get_statements(path: str):
    script_data = str(get_data('test', path), 'utf8')
    cleaned_script_data = re.sub(r'/\*.+?\*/;\n', "", script_data, flags=re.MULTILINE)
    statements = sqlparse.split(cleaned_script_data)
    return statements


def run_script(script: str, db: Database, connect=True):
    if connect:
Пример #20
0
    def close_issue(self, issue):
        headers = {
            'Authorization': f'token {self.token}',
            'Accept': 'application/vnd.github.v3+json'
        }
        params = {'state': 'closed'}
        response = requests.patch(
            f'https://api.github.com/repos/{self.user}/{self.repo}/issues/{issue}',
            headers=headers,
            json=params)
        response.raise_for_status()
        data = response.json()
        logging.info('response from github: %r', data)


@cached(Cache(maxsize=1))
def github_app_key():
    with open(os.getenv('GITHUB_APP_KEY'), 'rb') as keyfile:
        return load_pem_private_key(keyfile.read(), password=None)


@cached(TTLCache(maxsize=1, ttl=600))
def github_jwt() -> bytes:
    now = datetime.datetime.now(datetime.timezone.utc)
    delta_before = datetime.timedelta(0, 0, 0, 0, -1, 0, 0)  # 1 minute
    delta_after = datetime.timedelta(0, 0, 0, 0, 10, 0, 0)  # 10 minutes
    key = github_app_key()
    payload = {
        'exp': int((now + delta_after).timestamp()),
        'iat': int((now + delta_before).timestamp()),
        'iss': os.getenv('GITHUB_APP_ID')
Пример #21
0
class AbstractVersionedDataSet(AbstractDataSet, abc.ABC):
    """
    ``AbstractVersionedDataSet`` is the base class for all versioned data set
    implementations. All data sets that implement versioning should extend this
    abstract class and implement the methods marked as abstract.

    Example:
    ::

        >>> from pathlib import Path, PurePosixPath
        >>> import pandas as pd
        >>> from kedro.io import AbstractVersionedDataSet
        >>>
        >>>
        >>> class MyOwnDataSet(AbstractVersionedDataSet):
        >>>     def __init__(self, filepath, version, param1, param2=True):
        >>>         super().__init__(PurePosixPath(filepath), version)
        >>>         self._param1 = param1
        >>>         self._param2 = param2
        >>>
        >>>     def _load(self) -> pd.DataFrame:
        >>>         load_path = self._get_load_path()
        >>>         return pd.read_csv(load_path)
        >>>
        >>>     def _save(self, df: pd.DataFrame) -> None:
        >>>         save_path = self._get_save_path()
        >>>         df.to_csv(str(save_path))
        >>>
        >>>     def _exists(self) -> bool:
        >>>         path = self._get_load_path()
        >>>         return Path(path.as_posix()).exists()
        >>>
        >>>     def _describe(self):
        >>>         return dict(version=self._version, param1=self._param1, param2=self._param2)

    Example catalog.yml specification:
    ::

        my_dataset:
            type: <path-to-my-own-dataset>.MyOwnDataSet
            filepath: data/01_raw/my_data.csv
            versioned: true
            param1: <param1-value> # param1 is a required argument
            # param2 will be True by default
    """

    def __init__(
        self,
        filepath: PurePosixPath,
        version: Optional[Version],
        exists_function: Callable[[str], bool] = None,
        glob_function: Callable[[str], List[str]] = None,
    ):
        """Creates a new instance of ``AbstractVersionedDataSet``.

        Args:
            filepath: Filepath in POSIX format to a file.
            version: If specified, should be an instance of
                ``kedro.io.core.Version``. If its ``load`` attribute is
                None, the latest version will be loaded. If its ``save``
                attribute is None, save version will be autogenerated.
            exists_function: Function that is used for determining whether
                a path exists in a filesystem.
            glob_function: Function that is used for finding all paths
                in a filesystem, which match a given pattern.
        """
        self._filepath = filepath
        self._version = version
        self._exists_function = exists_function or _local_exists
        self._glob_function = glob_function or iglob
        # 1 entry for load version, 1 for save version
        self._version_cache = Cache(maxsize=2)

    # 'key' is set to prevent cache key overlapping for load and save:
    # https://cachetools.readthedocs.io/en/stable/#cachetools.cachedmethod
    @cachedmethod(cache=attrgetter("_version_cache"), key=partial(hashkey, "load"))
    def _fetch_latest_load_version(self) -> str:
        # When load version is unpinned, fetch the most recent existing
        # version from the given path.
        pattern = str(self._get_versioned_path("*"))
        version_paths = sorted(self._glob_function(pattern), reverse=True)
        most_recent = next(
            (path for path in version_paths if self._exists_function(path)), None
        )

        if not most_recent:
            raise VersionNotFoundError(f"Did not find any versions for {self}")

        return PurePath(most_recent).parent.name

    # 'key' is set to prevent cache key overlapping for load and save:
    # https://cachetools.readthedocs.io/en/stable/#cachetools.cachedmethod
    @cachedmethod(cache=attrgetter("_version_cache"), key=partial(hashkey, "save"))
    def _fetch_latest_save_version(self) -> str:  # pylint: disable=no-self-use
        """Generate and cache the current save version"""
        return generate_timestamp()

    def resolve_load_version(self) -> Optional[str]:
        """Compute the version the dataset should be loaded with."""
        if not self._version:
            return None
        if self._version.load:
            return self._version.load
        return self._fetch_latest_load_version()

    def _get_load_path(self) -> PurePosixPath:
        if not self._version:
            # When versioning is disabled, load from original filepath
            return self._filepath

        load_version = self.resolve_load_version()
        return self._get_versioned_path(load_version)  # type: ignore

    def resolve_save_version(self) -> Optional[str]:
        """Compute the version the dataset should be saved with."""
        if not self._version:
            return None
        if self._version.save:
            return self._version.save
        return self._fetch_latest_save_version()

    def _get_save_path(self) -> PurePosixPath:
        if not self._version:
            # When versioning is disabled, return original filepath
            return self._filepath

        save_version = self.resolve_save_version()
        versioned_path = self._get_versioned_path(save_version)  # type: ignore

        if self._exists_function(str(versioned_path)):
            raise DataSetError(
                f"Save path `{versioned_path}` for {str(self)} must not exist if "
                f"versioning is enabled."
            )

        return versioned_path

    def _get_versioned_path(self, version: str) -> PurePosixPath:
        return self._filepath / version / self._filepath.name

    def load(self) -> Any:
        self.resolve_load_version()  # Make sure last load version is set
        return super().load()

    def save(self, data: Any) -> None:
        self._version_cache.clear()
        save_version = self.resolve_save_version()  # Make sure last save version is set
        try:
            super().save(data)
        except (FileNotFoundError, NotADirectoryError) as err:
            # FileNotFoundError raised in Win, NotADirectoryError raised in Unix
            _default_version = "YYYY-MM-DDThh.mm.ss.sssZ"
            raise DataSetError(
                f"Cannot save versioned dataset `{self._filepath.name}` to "
                f"`{self._filepath.parent.as_posix()}` because a file with the same "
                f"name already exists in the directory. This is likely because "
                f"versioning was enabled on a dataset already saved previously. Either "
                f"remove `{self._filepath.name}` from the directory or manually "
                f"convert it into a versioned dataset by placing it in a versioned "
                f"directory (e.g. with default versioning format "
                f"`{self._filepath.as_posix()}/{_default_version}/{self._filepath.name}"
                f"`)."
            ) from err

        load_version = self.resolve_load_version()
        if load_version != save_version:
            warnings.warn(
                _CONSISTENCY_WARNING.format(save_version, load_version, str(self))
            )

    def exists(self) -> bool:
        """Checks whether a data set's output already exists by calling
        the provided _exists() method.

        Returns:
            Flag indicating whether the output already exists.

        Raises:
            DataSetError: when underlying exists method raises error.

        """
        self._logger.debug("Checking whether target of %s exists", str(self))
        try:
            return self._exists()
        except VersionNotFoundError:
            return False
        except Exception as exc:  # SKIP_IF_NO_SPARK
            message = (
                f"Failed during exists check for data set {str(self)}.\n{str(exc)}"
            )
            raise DataSetError(message) from exc

    def _release(self) -> None:
        super()._release()
        self._version_cache.clear()
Пример #22
0
 def cache(self, maxsize, missing=None, getsizeof=None):
     return Cache(maxsize, missing=missing, getsizeof=getsizeof)
Пример #23
0
class CredentialProvider:
    credentials = None
    cache = Cache(maxsize=10)

    def __init__(self, account='default', credentials=None):
        self.account = account
        self.read_credentials = [
            self.from_env, self.from_secrets, self.read_config
        ]
        if credentials:
            self.credentials = self.Config(**credentials)
            missing = self.credentials.check_config()
            if len(missing):
                raise MissingCredentials(
                    f'The following configuration parameters are missing: {missing}'
                )
        else:
            self.load_credentials()

    def load_credentials(self):
        for read_method in self.read_credentials:
            if read_method():
                return True

    def from_secrets(self):
        if not os.environ.get('SP_API_AWS_SECRET_ID', None):
            return
        try:
            client = boto3.client('secretsmanager')
            response = client.get_secret_value(
                SecretId=os.environ.get('SP_API_AWS_SECRET_ID'))
            secret = json.loads(response.get('SecretString'))
            account_data = dict(
                refresh_token=secret.get('SP_API_REFRESH_TOKEN'),
                lwa_app_id=secret.get('LWA_APP_ID'),
                lwa_client_secret=secret.get('LWA_CLIENT_SECRET'),
                aws_secret_key=secret.get('SP_API_SECRET_KEY'),
                aws_access_key=secret.get('SP_API_ACCESS_KEY'),
                role_arn=secret.get('SP_API_ROLE_ARN'))
            self.cache['account_data'] = json.dumps(account_data)
        except ClientError as client_error:
            return
        else:
            self.credentials = self.Config(**account_data)
            return len(self.credentials.check_config()) == 0

    def from_env(self):
        try:
            account_data = json.loads(self.cache['account_data'])
        except KeyError:
            account_data = dict(
                refresh_token=self._get_env('SP_API_REFRESH_TOKEN'),
                lwa_app_id=self._get_env('LWA_APP_ID'),
                lwa_client_secret=self._get_env('LWA_CLIENT_SECRET'),
                aws_secret_key=self._get_env('SP_API_SECRET_KEY'),
                aws_access_key=self._get_env('SP_API_ACCESS_KEY'),
                role_arn=self._get_env('SP_API_ROLE_ARN'))
        self.credentials = self.Config(**account_data)
        return len(self.credentials.check_config()) == 0

    def _get_env(self, key):
        return os.environ.get(f'{key}_{self.account}', os.environ.get(key))

    def read_config(self):
        try:
            config = confuse.Configuration('python-sp-api')
            config_filename = os.path.join(config.config_dir(),
                                           'credentials.yml')
            config.set_file(config_filename)
            account_data = config[self.account].get()
            self.credentials = self.Config(**account_data)
            missing = self.credentials.check_config()
            if len(missing):
                raise MissingCredentials(
                    f'The following configuration parameters are missing: {missing}'
                )
        except confuse.exceptions.NotFoundError:
            raise MissingCredentials(
                f'The account {self.account} was not setup in your configuration file.'
            )
        except confuse.exceptions.ConfigReadError:
            raise MissingCredentials(
                f'Neither environment variables nor a config file were found. '
                f'Please set the correct variables, or use a config file (credentials.yml). '
                f'See https://confuse.readthedocs.io/en/latest/usage.html#search-paths for search paths.'
            )
        else:
            return True

    class Config:
        def __init__(self,
                     refresh_token,
                     lwa_app_id,
                     lwa_client_secret,
                     aws_access_key,
                     aws_secret_key,
                     role_arn,
                     use_instance_profile=None):
            self.refresh_token = refresh_token
            self.lwa_app_id = lwa_app_id
            self.lwa_client_secret = lwa_client_secret
            self.aws_access_key = aws_access_key
            self.aws_secret_key = aws_secret_key
            self.role_arn = role_arn

        def check_config(self):
            errors = []
            for k, v in self.__dict__.items():
                if not v and k != 'refresh_token':
                    errors.append(k)
            return errors
Пример #24
0
import os
from flask import Flask, redirect, render_template, request
import urllib
import datetime
import json
import ibm_db
import geocoder
import geopy.distance
from config import *
import time
from cachetools import cached, Cache
import pandas as pd

app = Flask(__name__)
cache = Cache(maxsize=1000000)


@cached(cache)
def load_csv(fname, ftype):
    df = pd.read_csv('./static/{}.{}'.format(fname, ftype))
    return df


@cached(cache)
def save_file(fname):
    f = open('./static/{}.txt'.format(fname), 'w')
    return f


cache_sp = load_csv('sp', 'csv')
cache_pc = load_csv('pc', 'csv')
Пример #25
0
 def __init__(self, maxsize, getsizeof=None):
     Cache.__init__(self, maxsize, getsizeof=getsizeof)
     self.__order = collections.OrderedDict()
Пример #26
0
 def __init__(self, maxsize, getsizeof=None):
     Cache.__init__(self, maxsize, getsizeof=getsizeof)
     self.__order = collections.OrderedDict()
Пример #27
0
    def __init__(  # pylint: disable=too-many-arguments
        self,
        path: str,
        dataset: Union[str, Type[AbstractDataSet], Dict[str, Any]],
        filepath_arg: str = "filepath",
        filename_suffix: str = "",
        credentials: Dict[str, Any] = None,
        load_args: Dict[str, Any] = None,
        fs_args: Dict[str, Any] = None,
    ):
        """Creates a new instance of ``PartitionedDataSet``.

        Args:
            path: Path to the folder containing partitioned data.
                If path starts with the protocol (e.g., ``s3://``) then the
                corresponding ``fsspec`` concrete filesystem implementation will
                be used. If protocol is not specified,
                ``fsspec.implementations.local.LocalFileSystem`` will be used.
                **Note:** Some concrete implementations are bundled with ``fsspec``,
                while others (like ``s3`` or ``gcs``) must be installed separately
                prior to usage of the ``PartitionedDataSet``.
            dataset: Underlying dataset definition. This is used to instantiate
                the dataset for each file located inside the ``path``.
                Accepted formats are:
                a) object of a class that inherits from ``AbstractDataSet``
                b) a string representing a fully qualified class name to such class
                c) a dictionary with ``type`` key pointing to a string from b),
                other keys are passed to the Dataset initializer.
                Credentials for the dataset can be explicitly specified in
                this configuration.
            filepath_arg: Underlying dataset initializer argument that will
                contain a path to each corresponding partition file.
                If unspecified, defaults to "filepath".
            filename_suffix: If specified, only partitions that end with this
                string will be processed.
            credentials: Protocol-specific options that will be passed to
                ``fsspec.filesystem``
                https://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.filesystem
                and the dataset initializer. If the dataset config contains
                explicit credentials spec, then such spec will take precedence.
                All possible credentials management scenarios are documented here:
                https://kedro.readthedocs.io/en/stable/04_user_guide/08_advanced_io.html#partitioned-dataset-credentials
            load_args: Keyword arguments to be passed into ``find()`` method of
                the filesystem implementation.
            fs_args: Extra arguments to pass into underlying filesystem class constructor
                (e.g. `{"project": "my-project"}` for ``GCSFileSystem``)

        Raises:
            DataSetError: If versioning is enabled for the underlying dataset.
        """
        # pylint: disable=import-outside-toplevel
        from fsspec.utils import infer_storage_options  # for performance reasons

        super().__init__()

        self._path = path
        self._filename_suffix = filename_suffix
        self._protocol = infer_storage_options(self._path)["protocol"]
        self._partition_cache = Cache(maxsize=1)

        dataset = dataset if isinstance(dataset, dict) else {"type": dataset}
        self._dataset_type, self._dataset_config = parse_dataset_definition(
            dataset)
        if VERSION_KEY in self._dataset_config:
            raise DataSetError(
                "`{}` does not support versioning of the underlying dataset. "
                "Please remove `{}` flag from the dataset definition.".format(
                    self.__class__.__name__, VERSIONED_FLAG_KEY))

        if credentials:
            if CREDENTIALS_KEY in self._dataset_config:
                self._logger.warning(
                    KEY_PROPAGATION_WARNING,
                    {
                        "keys": CREDENTIALS_KEY,
                        "target": "underlying dataset"
                    },
                )
            else:
                self._dataset_config[CREDENTIALS_KEY] = deepcopy(credentials)

        self._credentials = deepcopy(credentials) or {}

        self._fs_args = deepcopy(fs_args) or {}
        if self._fs_args:
            if "fs_args" in self._dataset_config:
                self._logger.warning(
                    KEY_PROPAGATION_WARNING,
                    {
                        "keys": "filesystem arguments",
                        "target": "underlying dataset"
                    },
                )
            else:
                self._dataset_config["fs_args"] = deepcopy(self._fs_args)

        self._filepath_arg = filepath_arg
        if self._filepath_arg in self._dataset_config:
            warn(
                "`{}` key must not be specified in the dataset definition as it "
                "will be overwritten by partition path".format(
                    self._filepath_arg))

        self._load_args = deepcopy(load_args) or {}
        self._sep = self._filesystem.sep
        # since some filesystem implementations may implement a global cache
        self._invalidate_caches()
Пример #28
0
from ..meta import (
    MetaSymbol,
    MetaSymbolType,
    MetaOp,
    MetaVariable,
    MetaReificationError,
    meta_reify_iter,
    _metatize,
    metatize,
)

from .. import meta

from ..utils import HashableNDArray

tf_metatize_cache = Cache(50)


class MetaOpDefLibrary(object):
    """A singleton-like object that holds correspondences between TF Python API functions and the `OpDef`s they construct.

    It provides a map of `OpDef` names (lower-cased) to the Python API
    functions in `tensorflow.raw_ops`, as well as `inspect.Signature` objects
    for said functions so that default values and lists of arguments (keywords
    included) can be more easily used.

    """

    lower_op_name_to_raw = {
        op_name.lower(): op_name
        for op_name in dir(tf.raw_ops)
Пример #29
0
import spacy

import textacy
from textacy.compat import PY2, bytes_type

logger = logging.getLogger(__name__)

DEFAULT_DATA_DIR = textacy.__resources_dir__

_CACHE = {}
"""dict: key-value store used to cache datasets and such in memory"""


# TODO: maybe don't actually cache this -- it takes up a lot of RAM
# but is indeed a pain to load
@cached(Cache(1), key=partial(hashkey, 'spacy'))
def load_spacy(name, **kwargs):
    """
    Load a language-specific spaCy pipeline (collection of data, models, and
    resources) for tokenizing, tagging, parsing, etc. text; the most recent
    package loaded is cached.

    Args:
        name (str): standard 2-letter language abbreviation for a language;
            currently, spaCy supports English ('en') and German ('de')
        **kwargs: keyword arguments passed to :func:`spacy.load`; see the
            `spaCy docs <https://spacy.io/docs#english>`_ for details

            * via (str): non-default directory from which to load package data
            * vocab
            * tokenizer
Пример #30
0
from cachetools import cached, Cache

from ssd_detector.trainer import create_session, detection_model, InputValData
from ssd_detector.toolbox.coco_metrics_eval import calc_coco_metrics
from ssd_detector.toolbox.summary import group_ssd_heads, write_histogram_2d
from tfutils.helpers import draw_bboxes, load_module


def parse_args():
    parser = argparse.ArgumentParser(
        description='Perform evaluation of a detection model')
    parser.add_argument('path_to_config', help='Path to a config.py')
    return parser.parse_args()


@cached(Cache(100))
def load_coco(path):
    from pycocotools.coco import COCO
    return COCO(path)


# pylint: disable=too-many-locals,too-many-arguments
def eval_dataset(annotations,
                 config,
                 eval_name,
                 checkpoint_path,
                 session_config,
                 sample_images=None,
                 dump_priors_info=True):
    log_dir = os.path.join(config.MODEL_DIR, 'eval_' + eval_name)
    run_config = tf.estimator.RunConfig(session_config=session_config)
Пример #31
0
 def setdefault(self, *args, **kwargs):
     with self.__timer:
         return Cache.setdefault(self, *args, **kwargs)
Пример #32
0
 def get(self, key):
     """
     Returns the value of the item with the specified key without updating
     the cache eviction algorithm.
     """
     return Cache.__getitem__(self, key)