def persistent_memoize(target): """Memoize target function, keep persistent cache in state """ target_hash = hash(getsource(target)) cache = PersistentCache(target_hash) return memoize(target, cache=cache)
def __init__(self): """Initialize the Substitutor class. Notes ----- We need a new cache per substitution call, otherwise we leak state across calls and end up incorrectly reusing other substitions' cache. """ cache = toolz.memoize(key=lambda args, kwargs: args[0]._key) self.substitute = cache(self._substitute)
def test_memoized_get(): try: import toolz except ImportError: return cache = dict() getm = toolz.memoize(get, cache=cache, key=lambda args, kwargs: args[1:]) result = getm(d, ':z', get=getm) assert result == 3 assert contains(cache, {(':x',): 1, (':y',): 2, (':z',): 3})
def __init__(self, path, **kwargs): self.path = path self.kwargs = kwargs def __iter__(self): return (resource(os.path.join(self.path, fn), **self.kwargs) for fn in sorted(os.listdir(self.path))) def Directory(cls): """ Parametrized DirectoryClass """ return type('Directory(%s)' % cls.__name__, (_Directory,), {'container': cls}) Directory.__doc__ = Directory.__doc__ Directory = memoize(Directory) re_path_sep = os.path.sep if re_path_sep == '\\': re_path_sep = '\\\\' @discover.register(_Directory) def discover_Directory(c, **kwargs): return var * discover(first(c)).subshape[0] @resource.register('.+' + re_path_sep + '\*\..+', priority=15) def resource_directory(uri, **kwargs): path = uri.rsplit(os.path.sep, 1)[0] try:
def _get_s3_bucket(bucket_name, aws_access_key, aws_secret_key, connection, anon): """Connect to s3 and return a bucket""" import boto if anon is True: connection = boto.connect_s3(anon=anon) elif connection is None: connection = boto.connect_s3(aws_access_key, aws_secret_key) return connection.get_bucket(bucket_name) # we need an unmemoized function to call in the main thread. And memoized # functions for the dask. _memoized_get_bucket = toolz.memoize(_get_s3_bucket) def _get_key(bucket_name, conn_args, key_name): bucket = _memoized_get_bucket(bucket_name, *conn_args) key = bucket.get_key(key_name) ext = key_name.split('.')[-1] return stream_decompress(ext, key.read()) def _parse_s3_URI(bucket_name, paths): from ..compatibility import quote, unquote assert bucket_name.startswith('s3://') o = urlparse('s3://' + quote(bucket_name[len('s3://'):])) # if path is specified if (paths == '*') and (o.path != '' and o.path != '/'):
return var * Record(records) @memoize def metadata_of_engine(engine, schema=None): return sa.MetaData(engine, schema=schema) def create_engine(uri, *args, **kwargs): if ':memory:' in uri: return sa.create_engine(uri, *args, **kwargs) else: return memoized_create_engine(uri, *args, **kwargs) memoized_create_engine = memoize(sa.create_engine) @dispatch(sa.engine.base.Engine, str) def discover(engine, tablename): metadata = metadata_of_engine(engine) if tablename not in metadata.tables: try: metadata.reflect(engine, views=metadata.bind.dialect.supports_views) except NotImplementedError: metadata.reflect(engine) table = metadata.tables[tablename] return discover(table)
get_numba_type compute_signature """ if isinstance(expr, Broadcast): leaves = expr._scalars expr = expr._scalar_expr else: leaves = expr._leaves() s, scope = funcstr(leaves, expr) scope = dict((k, numba.jit(nopython=True)(v) if callable(v) else v) for k, v in scope.items()) # get the func func = eval(s, scope) # get the signature sig = compute_signature(expr) # vectorize is currently not thread safe. So lock the thread. # TODO FIXME remove this when numba has made vectorize thread safe. with lock: ufunc = numba.vectorize([sig], nopython=True)(func) return ufunc # do this here so we can run our doctest get_numba_ufunc = memoize(_get_numba_ufunc) def broadcast_numba(t, *data, **kwargs): return get_numba_ufunc(t)(*data)
>>> get_object_name_from_id(3, db) 'NOT_FOUND' """ found_names = [ name for name, ids_rgb, ids_pcloud in db.items() if any([object_id in ids_rgb, object_id in ids_pcloud]) ] return found_names[0] if found_names else 'NOT_FOUND' # Note: # This is the "memoized" [1] version of a private function with the same name # and is the one that should be used. # [1] http://toolz.readthedocs.org/en/latest/api.html#toolz.functoolz.memoize get_object_name_from_id = memoize(_get_object_name_from_id) class ObjectDBHelper(object): """Loads and stores the ObjectDatabase to YAML file.""" def __init__(self, db_filename): """ Constructor. Args: db_filename (str): Filename (and path) to the DB YAML file. """ super(ObjectDBHelper, self).__init__() self.db_filename = db_filename self.load(self.db_filename)
import pyspark from pyspark import RDD from pyspark.rdd import PipelinedRDD try: from pyspark.sql import DataFrame as SparkDataFrame except ImportError: SparkDataFrame = Dummy from pyspark.sql import SchemaRDD from pyspark.sql import SQLContext, HiveContext RDD.min except (AttributeError, ImportError): SparkDataFrame = PipelinedRDD = RDD = SparkContext = SQLContext = Dummy HiveContext = SchemaRDD = Dummy pyspark = Dummy() else: HiveContext = memoize(HiveContext) @append.register(SparkContext, list) def list_to_spark_context(sc, seq, **kwargs): return sc.parallelize(seq) @append.register(SparkContext, object) def anything_to_spark_context(sc, o, **kwargs): return append(sc, convert(list, o, **kwargs), **kwargs) @convert.register(list, (RDD, PipelinedRDD)) def rdd_to_list(rdd, **kwargs): return rdd.collect()
a, b = b, a + b return a # This is intuitive but VERY slow def fib(n): """ Functional definition of Fibonacci numbers """ if n == 0 or n == 1: return n else: return fib(n - 1) + fib(n - 2) from toolz import memoize # Oh wait, it's fast again fib = memoize(fib) # Provide a cache with initial values to `memoize` @memoize(cache={0: 0, 1: 1}) def fib(n): """ Functional definition of Fibonacci numbers with initial terms cached. fib(0) == 0 fib(1) == 1 ... fib(n) == fib(n - 1) + fib(n - 2) """ return fib(n - 1) + fib(n - 2)
def __init__(self, url, chunk_size=1024, decode_unicode=False, *args, **kwargs): self.subtype.__init__(self, url, *args, **kwargs) self.url = url self.chunk_size = chunk_size self.decode_unicode = decode_unicode self.filename = os.path.basename(urlparse(url).path) def URL(cls): return type('URL(%s)' % cls.__name__, (_URL, cls), {'subtype': cls}) URL.__doc__ = _URL.__doc__ URL = memoize(URL) @sample.register((URL(CSV), URL(JSONLines))) @contextmanager def sample_url_line_delimited(data, lines=5, encoding='utf-8'): """Get a size `length` sample from an URL CSV or URL line-delimited JSON. Parameters ---------- data : URL(CSV) A hosted CSV lines : int, optional, default ``5`` Number of lines to read into memory """
import time import random import pygame from pyparadigm.surface_composition import * from pyparadigm.misc import * from toolz import partial, memoize, pipe from toolz import compose as t_compose from toolz.curried import map import config as c lmap = t_compose(list, map) text_to_surface = memoize(lambda text: Text( text, Font(c.Text.font, size=c.Text.font_size), color=c.Text.text_color)) surface_to_screen = memoize(lambda s_text: compose(_bg()) (Surface(Margin(bottom=2))(s_text))) _bg = partial(empty_surface, c.Screen.background) _font = lambda: Font(c.Text.font, size=c.Text.font_size) def shuffled(iterable): l = list(iterable) random.shuffle(l) return l def multi_page_text(event_listener, text_array):
except boto.exception.S3ResponseError: bucket = self.s3.create_bucket(self.bucket, **filter_kwargs(self.s3.create_bucket, kwargs)) self.object = bucket.get_key(self.key, **filter_kwargs(bucket.get_key, kwargs)) if self.object is None: self.object = bucket.new_key(self.key) self.subtype.__init__(self, uri, *args, **filter_kwargs(self.subtype.__init__, kwargs)) def S3(cls): return type("S3(%s)" % cls.__name__, (_S3, cls), {"subtype": cls}) S3.__doc__ = _S3.__doc__ S3 = memoize(S3) @sample.register((S3(CSV), S3(JSONLines))) @contextmanager def sample_s3_line_delimited(data, length=8192): """Get a size `length` sample from an S3 CSV or S3 line-delimited JSON. Parameters ---------- data : S3(CSV) A CSV file living in an S3 bucket length : int, optional, default ``8192`` Number of bytes of the file to read """ headers = {"Range": "bytes=0-%d" % length}
self.auth = keyfilter( keywords(paramiko.SSHClient.connect).__contains__, kwargs) self.subtype.__init__(self, *args, **kwargs) def lines(self): conn = sftp(**self.auth) return conn.file(self.path, 'r') def SSH(cls): return type('SSH(%s)' % cls.__name__, (_SSH, cls), {'subtype': cls}) SSH.__doc__ = _SSH.__doc__ SSH = memoize(SSH) types_by_extension = {'csv': CSV, 'json': JSONLines} ssh_pattern = '((?P<username>[a-zA-Z]\w*)@)?(?P<hostname>[\w.-]*)(:(?P<port>\d+))?:(?P<path>[/\w.*-]+)' @resource.register('ssh://.+', priority=16) def resource_ssh(uri, **kwargs): if 'ssh://' in uri: uri = uri[len('ssh://'):] d = re.match(ssh_pattern, uri).groupdict() d = dict((k, v) for k, v in d.items() if v is not None) path = d.pop('path')
from __future__ import absolute_import, division, print_function from toolz import memoize from .drop import drop class _Temp(object): """ Temporary version of persistent storage Calls ``drop`` on object at garbage collection This is a parametrized type, so call it on types to make new types >>> from odo import Temp, CSV >>> csv = Temp(CSV)('/tmp/myfile.csv', delimiter=',') """ def __del__(self): drop(self) def Temp(cls): """ Parametrized Chunks Class """ return type('Temp(%s)' % cls.__name__, (_Temp, cls), {'persistent_type': cls}) Temp.__doc__ = _Temp.__doc__ Temp = memoize(Temp)
return DataShape(*(self._child.shape + (self.schema, ))) def __str__(self): return '%s.coerce(to=%r)' % (self._child, str(self.schema)) @copydoc(Coerce) def coerce(expr, to): return Coerce(expr, dshape(to) if isinstance(to, _strtypes) else to) dshape_method_list = list() schema_method_list = list() method_properties = set() dshape_methods = memoize(partial(select_functions, dshape_method_list)) schema_methods = memoize(partial(select_functions, schema_method_list)) @dispatch(DataShape) def shape(ds): s = ds.shape s = tuple(int(d) if isinstance(d, Fixed) else d for d in s) return s @dispatch(object) def shape(expr): """ Shape of expression >>> symbol('s', '3 * 5 * int32').shape
for i in range(n): a, b = b, a + b return a # This is intuitive but VERY slow def fib(n): """ Functional definition of Fibonacci numbers """ if n == 0 or n == 1: return n else: return fib(n - 1) + fib(n - 2) from toolz import memoize # Oh wait, it's fast again fib = memoize(fib) # Provide a cache with initial values to `memoize` @memoize(cache={0: 0, 1: 1}) def fib(n): """ Functional definition of Fibonacci numbers with initial terms cached. fib(0) == 0 fib(1) == 1 ... fib(n) == fib(n - 1) + fib(n - 2) """ return fib(n - 1) + fib(n - 2)
def __init__(self): cache = toolz.memoize(key=lambda args, kwargs: _expr_key(args[0])) self.substitute = cache(self._substitute)
def __init__(self, *args, **kwargs): self.auth = keyfilter(keywords(paramiko.SSHClient.connect).__contains__, kwargs) self.subtype.__init__(self, *args, **kwargs) def lines(self): conn = sftp(**self.auth) return conn.file(self.path, 'r') def SSH(cls): return type('SSH(%s)' % cls.__name__, (_SSH, cls), {'subtype': cls}) SSH.__doc__ = _SSH.__doc__ SSH = memoize(SSH) types_by_extension = {'csv': CSV, 'json': JSONLines} ssh_pattern = '((?P<username>[a-zA-Z]\w*)@)?(?P<hostname>[\w.-]*)(:(?P<port>\d+))?:(?P<path>[/\w.*-]+)' @resource.register('ssh://.+', priority=16) def resource_ssh(uri, **kwargs): if 'ssh://' in uri: uri = uri[len('ssh://'):] d = re.match(ssh_pattern, uri).groupdict() d = dict((k, v) for k, v in d.items() if v is not None) path = d.pop('path')
def _get_s3_bucket(bucket_name, aws_access_key, aws_secret_key, connection, anon): """Connect to s3 and return a bucket""" import boto if anon is True: connection = boto.connect_s3(anon=anon) elif connection is None: connection = boto.connect_s3(aws_access_key, aws_secret_key) return connection.get_bucket(bucket_name) # we need an unmemoized function to call in the main thread. And memoized # functions for the dask. _memoized_get_bucket = toolz.memoize(_get_s3_bucket) def _get_key(bucket_name, conn_args, key_name): bucket = _memoized_get_bucket(bucket_name, *conn_args) key = bucket.get_key(key_name) ext = key_name.split('.')[-1] return stream_decompress(ext, key.read()) def _parse_s3_URI(bucket_name, paths): assert bucket_name.startswith('s3://') o = urlparse('s3://' + quote(bucket_name[len('s3://'):])) # if path is specified if (paths == '*') and (o.path != '' and o.path != '/'): paths = unquote(o.path[1:])
from __future__ import absolute_import, division, print_function from toolz import memoize from .drop import drop class _Temp(object): """ Temporary version of persistent storage Calls ``drop`` on object at garbage collection This is a parametrized type, so call it on types to make new types >>> from into import Temp, CSV >>> csv = Temp(CSV)('/tmp/myfile.csv', delimiter=',') """ def __del__(self): drop(self) def Temp(cls): """ Parametrized Chunks Class """ return type('Temp(%s)' % cls.__name__, (_Temp, cls), {'persistent_type': cls}) Temp.__doc__ = _Temp.__doc__ Temp = memoize(Temp)
connect_args=frozenset( (connect_args or {}).items()), **kwargs) def _create_engine_hashable_args(uri, connect_args=None, **kwargs): """Unpacks non-hashable args for ``sa.create_engine`` and puts that back into whatever structure is expected. """ return sa.create_engine(uri, connect_args=dict(connect_args or {}), **kwargs) _memoized_create_engine_hashable_args = memoize(_create_engine_hashable_args) @dispatch(sa.engine.base.Engine, str) def discover(engine, tablename): metadata = sa.MetaData(engine) if tablename not in metadata.tables: try: metadata.reflect(engine, views=metadata.bind.dialect.supports_views) except NotImplementedError: metadata.reflect(engine) table = metadata.tables[tablename] return discover(table)
_memoized_create_engine_hashable_args )(uri, connect_args=frozenset((connect_args or {}).items()), **kwargs) def _create_engine_hashable_args(uri, connect_args=None, **kwargs): """Unpacks non-hashable args for ``sa.create_engine`` and puts that back into whatever structure is expected. """ return sa.create_engine( uri, connect_args=dict(connect_args or {}), **kwargs ) _memoized_create_engine_hashable_args = memoize(_create_engine_hashable_args) @dispatch(sa.engine.base.Engine, str) def discover(engine, tablename): metadata = sa.MetaData(engine) if tablename not in metadata.tables: try: metadata.reflect(engine, views=metadata.bind.dialect.supports_views) except NotImplementedError: metadata.reflect(engine) table = metadata.tables[tablename] return discover(table)
def __str__(self): return '%s.coerce(to=%r)' % (self._child, str(self.schema)) def apply(expr, func, dshape, splittable=False): return Apply(expr, func, datashape.dshape(dshape), splittable) apply.__doc__ = Apply.__doc__ dshape_method_list = list() schema_method_list = list() method_properties = set() dshape_methods = memoize(partial(select_functions, dshape_method_list)) schema_methods = memoize(partial(select_functions, schema_method_list)) @dispatch(DataShape) def shape(ds): s = ds.shape s = tuple(int(d) if isinstance(d, Fixed) else d for d in s) return s @dispatch(object) def shape(expr): """ Shape of expression >>> symbol('s', '3 * 5 * int32').shape
self.object = bucket.get_key(self.key, **filter_kwargs(bucket.get_key, kwargs)) if self.object is None: self.object = bucket.new_key(self.key) self.subtype.__init__(self, uri, *args, **filter_kwargs(self.subtype.__init__, kwargs)) def S3(cls): return type('S3(%s)' % cls.__name__, (_S3, cls), {'subtype': cls}) S3.__doc__ = _S3.__doc__ S3 = memoize(S3) @sample.register((S3(CSV), S3(JSONLines))) @contextmanager def sample_s3_line_delimited(data, length=8192): """Get a size `length` sample from an S3 CSV or S3 line-delimited JSON. Parameters ---------- data : S3(CSV) A CSV file living in an S3 bucket length : int, optional, default ``8192`` Number of bytes of the file to read """ headers = {'Range': 'bytes=0-%d' % length}
>>> c = chunks(list)([[1, 2, 3], [4, 5, 6]]) >>> next(iter(c)) [1, 2, 3] >>> c.container.__name__ 'list' """ def __init__(self, data): self.data = data def __iter__(self): if callable(self.data): return self.data() else: return iter(self.data) def chunks(cls): """ Parametrized Chunks Class """ return type('chunks(%s)' % cls_name(cls), (Chunks,), {'container': cls}) chunks.__doc__ = Chunks.__doc__ chunks = memoize(chunks) @discover.register(Chunks) def discover_chunks(c, **kwargs): return var * discover(first(c)).subshape[0]
''' Exploring the spectral decomposition for matrices ''' from itertools import imap import numpy as np import toolz import utils # Implement caching eigh = toolz.memoize(np.linalg.eigh) class sym_matrix(np.matrix): ''' Class for real symmetric matrices ''' # Rewrite this to __init__ def __new__(cls, input_array): m = np.asarray(input_array) if (m != m.T).any(): raise ValueError('Matrix must be symmetric') return m.view(cls) @staticmethod def rand(n=3, maxint=10): ''' Generate a random symmetric n x n matrix