Пример #1
0
    def __init__(self, *, config: Config, name: str, storage_id: int, module_configuration: ConfigDict) -> None:
        read_cache_directory = Config.get_from_dict(module_configuration, 'readCache.directory', None, types=str)
        read_cache_maximum_size = Config.get_from_dict(module_configuration, 'readCache.maximumSize', None, types=int)
        read_cache_shards = Config.get_from_dict(module_configuration, 'readCache.shards', None, types=int)

        if read_cache_directory and read_cache_maximum_size:
            os.makedirs(read_cache_directory, exist_ok=True)
            try:
                self._read_cache = FanoutCache(
                    read_cache_directory,
                    size_limit=read_cache_maximum_size,
                    shards=read_cache_shards,
                    eviction_policy='least-frequently-used',
                    statistics=1,
                )
            except Exception:
                logger.warning('Unable to enable disk based read caching. Continuing without it.')
                self._read_cache = None
            else:
                logger.debug('Disk based read caching instantiated (cache size {}, shards {}).'.format(
                    read_cache_maximum_size, read_cache_shards))
        else:
            self._read_cache = None
        self._use_read_cache = True

        # Start reader and write threads after the disk cached is created, so that they see it.
        super().__init__(config=config, name=name, storage_id=storage_id, module_configuration=module_configuration)
Пример #2
0
 def __init__(self, *args, directory=None, shards=8, **kwargs: Any) -> None:
     self.__is_init = False
     self._set_locks: Dict[str, asyncio.Lock] = {}
     self._sharded = shards > 1
     if not self._sharded:
         self._cache = Cache(directory=directory, **kwargs)
     else:
         self._cache = FanoutCache(directory=directory, shards=shards, **kwargs)
     super().__init__()
Пример #3
0
def worker(queue, eviction_policy, processes, threads):
    timings = {'get': [], 'set': [], 'delete': []}
    cache = FanoutCache('tmp', eviction_policy=eviction_policy)

    for index, (action, key, value) in enumerate(iter(queue.get, None)):
        start = time.time()

        if action == 'set':
            cache.set(key, value, expire=EXPIRE)
        elif action == 'get':
            result = cache.get(key)
        else:
            assert action == 'delete'
            cache.delete(key)

        stop = time.time()

        if (action == 'get' and processes == 1 and threads == 1
                and EXPIRE is None):
            assert result == value

        if index > WARMUP:
            timings[action].append(stop - start)

    queue.put(timings)

    cache.close()
Пример #4
0
def cache_pop(key, timeout_seconds=0.3, cache_dirname='/tmp/lccserver-cache'):
    '''
    This sets a key to the value specified in the cache.

    '''
    cachedir = os.path.abspath(cache_dirname)
    cache = FanoutCache(cachedir, timeout=timeout_seconds)
    val = cache.pop(key)
    cache.close()

    return val
Пример #5
0
def cache_flush(timeout_seconds=0.3, cache_dirname='/tmp/lccserver-cache'):
    '''
    This removes all keys from the cache.

    '''
    cachedir = os.path.abspath(cache_dirname)
    cache = FanoutCache(cachedir, timeout=timeout_seconds)
    items_removed = cache.clear()
    cache.close()

    return items_removed
Пример #6
0
    def __init__(self):
        """

        Function used to initialise the data manager.

        ** Modifications **

            1. Modify the '_transmission_keys' set to specify which values should be transmitted to each component.

        """

        # Declare dictionaries of data
        self._surface = FanoutCache(path.join("cache", "surface"), shards=2)
        self._arduino_T = FanoutCache(path.join("cache", "arduino_t"),
                                      shards=2)
        self._arduino_A = FanoutCache(path.join("cache", "arduino_a"),
                                      shards=2)
        self._arduino_M = FanoutCache(path.join("cache", "arduino_m"),
                                      shards=2)
        self._arduino_I = FanoutCache(path.join("cache", "arduino_i"),
                                      shards=2)

        # Create a dictionary mapping each index to corresponding location
        self._data = {
            SURFACE: self._surface,
            ARDUINO_T: self._arduino_T,
            ARDUINO_A: self._arduino_A,
            ARDUINO_M: self._arduino_M,
            ARDUINO_I: self._arduino_I
        }

        # Create a dictionary mapping each index to a set of networking keys
        self._transmission_keys = {
            SURFACE: {
                "status_T", "status_A", "status_M", "status_I", "error_T",
                "error_A", "error_M", "error_I", "Sen_IMU_X", "Sen_IMU_Y",
                "Sen_IMU_Z", "Sen_IMU_Temp"
            },
            ARDUINO_T: {
                "Thr_FP", "Thr_FS", "Thr_AP", "Thr_AS", "Thr_TFP", "Thr_TFS",
                "Thr_TAP", "Thr_TAS"
            },
            ARDUINO_A: {"Mot_R", "Mot_G", "Mot_F", "LED_M"},
            ARDUINO_M: {"Thr_M"},
            ARDUINO_I: {}
        }

        # Create a key to ID lookup for performance reasons
        self._keys_lookup = {
            v: k
            for k, values in self._transmission_keys.items() if k != SURFACE
            for v in values
        }
Пример #7
0
def cache_delete(key,
                 timeout_seconds=0.3,
                 cache_dirname='/tmp/authnzerver-cache'):
    '''
    This sets a key to the value specified in the cache.

    '''
    cachedir = os.path.abspath(cache_dirname)
    cache = FanoutCache(cachedir, timeout=timeout_seconds)
    deleted = cache.delete(key)
    cache.close()

    return deleted
class LocalDiskCache(CacheBase):
    def __init__(self,
                 path,
                 size_limit_bytes,
                 expected_row_size_bytes,
                 shards=6,
                 cleanup=False,
                 **settings):
        """LocalDiskCache is an adapter to a diskcache implementation.

        LocalDiskCache can be used by a petastorm Reader class to temporarily keep parts of the dataset on a local
        file system storage.

        :param path: Path where the dataset cache is being stored.
        :param size_limit_bytes: Maximal size of the disk-space to be used by cache. The size of the cache may actually
                                 grow somewhat above the size_limit_bytes, so the limit is not very strict.
        :param expected_row_size_bytes: Approximate size of a single row. This argument is used to perform a sanity
                                 check on the capacity of individual shards.
        :param shards: Cache can be sharded. Larger number of shards improve writing parallelism.
        :param cleanup: If set to True, cache directory would be removed when cleanup() method is called.
        :param settings: these parameters passed-through to the diskcache.Cache class.
                         For details, see: http://www.grantjenks.com/docs/diskcache/tutorial.html#settings
        """
        if size_limit_bytes / shards < 5 * expected_row_size_bytes:
            raise ValueError(
                'Condition \'size_limit_bytes / shards < 5 * expected_row_size_bytes\' needs to hold, '
                'otherwise, newly added cached values might end up being immediately evicted.'
            )

        default_settings = {
            'size_limit': size_limit_bytes,
            'eviction_policy': 'least-recently-stored',
        }
        default_settings.update(settings)

        self._cleanup = cleanup
        self._path = path
        self._cache = FanoutCache(path, shards, **default_settings)

    def get(self, key, fill_cache_func):
        value = self._cache.get(key, default=None)
        if value is None:
            value = fill_cache_func()
            self._cache.set(key, value)

        return value

    def cleanup(self):
        if self._cleanup:
            shutil.rmtree(self._path)
Пример #9
0
def worker(queue, eviction_policy):
    timings = {'get': [], 'set': [], 'delete': []}
    cache = FanoutCache('tmp', eviction_policy=eviction_policy)

    for index, (action, key, value) in enumerate(iter(queue.get, None)):
        start = time.time()

        if action == 'set':
            cache.set(key, value, expire=EXPIRE)
        elif action == 'get':
            result = cache.get(key)
        else:
            assert action == 'delete'
            cache.delete(key)

        stop = time.time()

        if action == 'get' and PROCESSES == 1 and THREADS == 1 and EXPIRE is None:
            assert result == value

        if index > WARMUP:
            timings[action].append(stop - start)

    queue.put(timings)

    cache.close()
Пример #10
0
def cache_add(key, value,
              timeout_seconds=0.3,
              expires_seconds=None,
              cache_dirname='/tmp/authnzerver-cache'):
    '''
    This sets a key to the value specified in the cache.

    '''

    cachedir = os.path.abspath(cache_dirname)
    cache = FanoutCache(cachedir, timeout=timeout_seconds)
    added = cache.add(value, expire=expires_seconds)
    cache.close()

    return added
Пример #11
0
 def __init__(self, model, cache_dir, chunk_size=1000, max_workers=None):
     self.model = model
     self.chunk_size = chunk_size
     self.cache_dir = cache_dir
     self.cache = FanoutCache(cache_dir, shards=24,
                              size_limit=2**32)  # 4GB cache
     self.max_workers = max_workers
Пример #12
0
    def init_cache(self,
                   shards: int = 0,
                   timeout: int = 0,
                   cache: FanoutCache = None):
        """
        Init or reset (NOT clear!) the underlying DiskCache based on parameters and object attributes.
        If old cache already exists and shards have changed, this COPIES values into new cache to duplicate.
        Does NOT change the cache_path! That means old cache will be REPLACED by new one on disk.
        If you supply a new `diskcache.FanoutCache` object it MUST point to `self.cache_path`!
        Otherwise leave `cache` unset and this method will create a new `FanoutCache` for you.

        :param int shards: If `0` then use `self.shards`
        :param int timeout: If `0` then use `self.timeout`
        :param cache: If `None` then create new `FanoutCache`
        """

        shards = shards or self.shards
        timeout = timeout or self.timeout

        # Allows the calling function to supply a FanoutCache to REPLACE the one on disk
        if cache and cache.directory != self.cache_path:
            raise Exception(
                "Given DiskCache and DataStore paths do not match!")
        cache = cache or FanoutCache(self.cache_path,
                                     shards=shards,
                                     timeout=timeout,
                                     tag_index=self.tag_index)

        # Already have a cache and number of shards changed? We'll have to copy.
        if self.cache and shards != self.shards:
            DataStore.copy_cache(self.cache, cache)

        self.cache = cache
        self.close()
Пример #13
0
def stress_test(
    create=True,
    delete=True,
    eviction_policy=u'least-recently-stored',
    processes=1,
    threads=1,
):
    shutil.rmtree('tmp', ignore_errors=True)

    if processes == 1:
        # Use threads.
        func = threading.Thread
    else:
        func = mp.Process

    subprocs = [
        func(target=dispatch, args=(num, eviction_policy, processes, threads))
        for num in range(processes)
    ]

    if create:
        operations = list(all_ops())
        process_queue = [[] for _ in range(processes)]

        for index, ops in enumerate(operations):
            process_queue[index % processes].append(ops)

        for num in range(processes):
            with open('input-%s.pkl' % num, 'wb') as writer:
                pickle.dump(process_queue[num], writer, protocol=2)

    for process in subprocs:
        process.start()

    for process in subprocs:
        process.join()

    with FanoutCache('tmp') as cache:
        warnings.simplefilter('error')
        warnings.simplefilter('ignore', category=UnknownFileWarning)
        warnings.simplefilter('ignore', category=EmptyDirWarning)
        cache.check()

    timings = {'get': [], 'set': [], 'delete': [], 'self': 0.0}

    for num in range(processes):
        with open('output-%s.pkl' % num, 'rb') as reader:
            data = pickle.load(reader)
            for key in data:
                timings[key] += data[key]

    if delete:
        for num in range(processes):
            os.remove('input-%s.pkl' % num)
            os.remove('output-%s.pkl' % num)

    display(eviction_policy, timings)

    shutil.rmtree('tmp', ignore_errors=True)
Пример #14
0
def getCache(scope_str):
    return FanoutCache(
        'data/cache/' + scope_str,
        disk=GzipDisk,
        shards=64,
        timeout=1,
        size_limit=3e11,
        # disk_min_file_size=2**20,
    )


# def disk_cache(base_path, memsize=2):
#     def disk_cache_decorator(f):
#         @functools.wraps(f)
#         def wrapper(*args, **kwargs):
#             args_str = repr(args) + repr(sorted(kwargs.items()))
#             file_str = hashlib.md5(args_str.encode('utf8')).hexdigest()
#
#             cache_path = os.path.join(base_path, f.__name__, file_str + '.pkl.gz')
#
#             if not os.path.exists(os.path.dirname(cache_path)):
#                 os.makedirs(os.path.dirname(cache_path), exist_ok=True)
#
#             if os.path.exists(cache_path):
#                 return pickle_loadgz(cache_path)
#             else:
#                 ret = f(*args, **kwargs)
#                 pickle_dumpgz(cache_path, ret)
#                 return ret
#
#         return wrapper
#
#     return disk_cache_decorator
#
#
# def pickle_dumpgz(file_path, obj):
#     log.debug("Writing {}".format(file_path))
#     with open(file_path, 'wb') as file_obj:
#         with gzip.GzipFile(mode='wb', compresslevel=1, fileobj=file_obj) as gz_file:
#             pickle.dump(obj, gz_file, pickle.HIGHEST_PROTOCOL)
#
#
# def pickle_loadgz(file_path):
#     log.debug("Reading {}".format(file_path))
#     with open(file_path, 'rb') as file_obj:
#         with gzip.GzipFile(mode='rb', fileobj=file_obj) as gz_file:
#             return pickle.load(gz_file)
#
#
# def dtpath(dt=None):
#     if dt is None:
#         dt = datetime.datetime.now()
#
#     return str(dt).rsplit('.', 1)[0].replace(' ', '--').replace(':', '.')
#
#
# def safepath(s):
#     s = s.replace(' ', '_')
#     return re.sub('[^A-Za-z0-9_.-]', '', s)
Пример #15
0
def getCache(scope_str):
    return FanoutCache('../data-luna/cache/' + scope_str,
                       disk=GzipDisk,
                       shards=64,
                       timeout=1,
                       size_limit=3e11,
                       # disk_min_file_size=2**20,
                       )
Пример #16
0
def make_matcher():
    lookup = get_title_lookup()
    cache = FanoutCache('title_cache', shards=24)
    matcher = layered_matcher([
        exact_matcher(lookup),
        title_matcher(lookup, punct_lookup(cache, lookup))
    ])
    return matcher
Пример #17
0
def get_cache(value: str) -> FanoutCache:
    return FanoutCache(
        get_cache_path(value),
        size_limit=int(1e12),  # 1000 GB
        cull_limit=0,
        shards=8,
        sqlite_mmap_size=256,
    )
Пример #18
0
def getCache(scope_str):
    return FanoutCache(
        "data-unversioned/cache/" + scope_str,
        disk=GzipDisk,
        shards=64,
        timeout=1,
        size_limit=3e11,
        # disk_min_file_size=2**20,
    )
Пример #19
0
def getCache(scope_str):
    return FanoutCache(
        'data-unversioned/cache/' + scope_str,
        disk=GzipDisk,
        shards=128,
        timeout=1,
        size_limit=2e11,
        disk_min_file_size=2**20,
    )
Пример #20
0
 def use_diskcache(self, **kwargs):
     """
     use the PyPi package 'diskcache' as the main store
     """
     try:
         from diskcache import FanoutCache as Cache
         # patch the class so it has "exists" method
         Cache.exists = Cache.__contains__
         cachedir = kwargs.pop("cachedir", "cache")
         self._cache = Cache(cachedir, **kwargs)
         file_cachedir = kwargs.pop("file_cachedir", "files_cache")
         self._file_cache = Cache(file_cachedir, **kwargs)
         self._cache_engine = "diskcache"
         return
     except Exception as exc:
         warning = "diskcache connection failed with:\n\t" + str(exc)
         warning += "\nFalling back to in-memory cache."
         warnings.warn(warning)
         self.use_memory()
Пример #21
0
    def __init__(self):
        """
        Constructor function used to initialise the data manager.

        Adjust the `shards` amount in the cache constructor to increase or decrease the amount of parallelism in
        data-related computations, as well as modify the `self._transmission_keys` set to specify which data should be
        networked to the middle-level software.
        """

        # Initialise the data cache
        self._data = FanoutCache(CACHE_PATH, shards=8)

        # Create a set of keys matching data which should be sent over the network
        self._transmission_keys = {
            "Thr_FP", "Thr_FS", "Thr_AP", "Thr_AS", "Thr_TFP", "Thr_TFS", "Thr_TAP", "Thr_TAS", "Mot_R", "Mot_G",
            "Mot_F"
        }

        # Initialise safeguard-related fields
        self._init_safeguards()
Пример #22
0
class ExpiringCache:
    def __init__(self, cache_path):
        __slots__ = ["cache"]
        self.cache = FanoutCache(directory=cache_path, timeout=2, retry=True)

    def set(self, domain, domain_record, seconds_to_live, tag=None):
        self.cache.set(domain,
                       domain_record.json,
                       expire=seconds_to_live,
                       tag=tag)

    def get(self, key):
        return self.cache.get(key)

    def cache_dump(self, offset=0, limit=100):
        res = []
        for eachkey in self.cache:
            if offset != 0:
                offset -= 1
                continue
            if limit == 0:
                break
            val, expires = self.cache.get(eachkey, expire_time=True)
            if expires:
                exp_date = datetime.datetime.fromtimestamp(expires)
            else:
                exp_date = "Never"
            res.append({
                "domain": eachkey,
                "cache_value": val,
                "cache_expires": exp_date
            })
            limit -= 1
        return res

    def cache_get(self, key):
        val, expires = self.cache.get(key, expire_time=True)
        if not val:
            return {"text": "That domain is not in the database."}
        if expires:
            exp_date = datetime.datetime.fromtimestamp(expires)
        else:
            exp_date = "Never"
        res = {"domain": key, "cache_value": val, "cache_expires": exp_date}
        return res

    def cache_info(self):
        hits, miss = self.cache.stats()
        size = self.cache.volume()
        warnings = self.cache.check(fix=True)
        return {"hit": hits, "miss": miss, "size": size, "warnings": warnings}
Пример #23
0
def cache_getrate(key,
                  timeout_seconds=0.3,
                  cache_dirname='/tmp/lccserver-cache'):
    '''This gets the rate of increment for the key by looking at the time of
    insertion inserted at key and the number of times it was incremented in
    key-counter. The rate is then:

    key-counter_val/((time_now - time_insertion)/60.0)

    '''
    cachedir = os.path.abspath(cache_dirname)
    cache = FanoutCache(cachedir, timeout=timeout_seconds)

    # get the counter value
    counter_val = cache.get('%s-counter' % key, default=0)

    # get the time of insertion that we stored at the key itself
    time_of_insertion = cache.get(key, default=None)

    if time_of_insertion is not None:

        rate = (counter_val / (time.time() - time_of_insertion)) * 60.0
    else:
        rate = 0.0

    cache.close()
    return (rate, counter_val,
            datetime.fromtimestamp(time_of_insertion).isoformat())
Пример #24
0
 def __init__(self,
              vec_path,
              cache_dir,
              sep=' ',
              chunk_size=1000,
              max_workers=None):
     self.embedding = Embedding(vec_path, sep=sep)
     self.chunk_size = chunk_size
     self.vec_path = vec_path
     self.sep = sep
     self.cache = FanoutCache(cache_dir, shards=24, size_limit=2**32)
     self.cache_dir = cache_dir
     self.max_workers = max_workers
Пример #25
0
def cache_decrement(key,
                    timeout_seconds=0.3,
                    cache_dirname='/tmp/lccserver-cache'):
    '''
    This decrements the counter for key.

    '''
    cachedir = os.path.abspath(cache_dirname)
    cache = FanoutCache(cachedir, timeout=timeout_seconds)
    decremented_val = cache.decr('%s-counter' % key)

    # if the counter hits zero, delete the key entirely from the cache
    if decremented_val == 0:
        cache.delete(key)
        cache.delete('%s-counter' % key)
        decremented_val = 0

    cache.close()
    return decremented_val
Пример #26
0
def getCache(scope_str):
    # Built atop Cache is diskcache.FanoutCache which automatically
    # shards the underlying database. Sharding is the practice of
    # horizontally partitioning data. Here it is used to decrease
    # blocking writes. While readers and writers do not block each
    # other, writers block other writers. Therefore a shard for every
    # concurrent writer is suggested. This will depend on your scenario.
    # The default value is 8.
    # timeout sets a limit on how long to wait for database
    # transactions.
    # size_limit is used as the total size of the cache. The size limit
    # of individual cache shards is the total size divided by the number
    # of shards.
    return FanoutCache("data-unversioned/cache/" + scope_str,
                       disk=GzipDisk,
                       shards=64,
                       timeout=1,
                       size_limit=2e11)
Пример #27
0
    def initialize_cache(self, shards=None, timeout=1, queue=False):
        self.reset_cache()

        # Create a temporary directory for the cache
        self.cache_directory = mkdtemp()

        # Create a queue?
        if queue:
            self.cache = Deque(directory=self.cache_directory)
            self.queue = True
        elif shards:
            self.cache = FanoutCache(self.cache_directory,
                                     shards=shards,
                                     timeout=timeout)
            self.queue = False
        else:
            self.cache = Cache(self.cache_directory, timeout=timeout)
            self.queue = False

        return self.cache
Пример #28
0
def cache_increment(key, timeout_seconds=0.3, cache_dirname='lccserver-cache'):
    '''
    This sets up a counter for the key in the cache.

    Sets the key -> time of initial insertion
    Then increments 'key-counter'.

    '''
    cachedir = os.path.abspath(cache_dirname)
    cache = FanoutCache(cachedir, timeout=timeout_seconds)

    # add the key if not already present
    key_added = cache.add(key, time.time())

    # increment the counter in either case
    if key_added:
        key_count = cache.incr('%s-counter' % key)
    else:
        key_count = cache.incr('%s-counter' % key)

    cache.close()
    return key_count
Пример #29
0
#coding:utf-8
from timeit import Timer
from diskcache import FanoutCache
import sys
sys.setrecursionlimit(3000)

#缓存临时文件位置
cache = FanoutCache('tmp/diskcache/fanoutcache')


@cache.memoize(typed=True, expire=None, tag='fib_disk')
def fib_disk(n):
    if n <= 2:
        return 1
    else:
        return fib_disk(n - 1) + fib_disk(n - 2)


if __name__ == "__main__":
    t1 = Timer("fib_disk(100)", "from __main__ import fib_disk")
    print("fib_disk(100)", t1.timeit(number=1000), "seconds")

#计算结果:
#fib_disk(100) 0.12914266199999996 seconds
#fib_disk(100) 0.129672597 seconds
Пример #30
0
def getCache(scope_str):
    return FanoutCache('data/cache/' + scope_str,
                       disk=GzipDisk,
                       shards=32,
                       timeout=1,
                       size_limit=2e11)
Пример #31
0
# -*- coding: utf-8 -*-
"""
Helper functions.
"""
import re
import os
import errno
import pandas
from diskcache import FanoutCache

CACHE = FanoutCache('../cache')


def mkdirs(path):
    """Make directories if they do not exist."""
    try:
        os.makedirs(path)
    except OSError as e:
        if e.errno != errno.EEXIST:
            raise


def write_to_csv(df, *path_parts):
    """Save a dataframe to CSV."""
    here = os.path.abspath(os.path.dirname(__file__))
    _dir = os.path.join(os.path.dirname(here), *path_parts[:-1])
    fn = path_parts[-1]
    mkdirs(_dir)
    out_path = os.path.join(_dir, fn)
    df.to_csv(out_path, encoding='utf-8', index=False)
    print('CSV file saved to {}'.format(out_path))