def serialize_partial(func_name, output_key_base='load_function'): """Serialize a function call to a dictionary. Parameters ---------- func_name: partial function. output_key_base Returns ------- dict containing: {output_key_base}_name: function name {output_key_base}_module: fully-qualified module name containing function {output_key_base}_args: args to pass to function {output_key_base}_kwargs: kwargs to pass to function """ entry = {} if func_name is None: logger.warning( f"serialize_partial: `{output_key_base}` is None. Ignoring.") return entry func = partial(func_name) entry[f'{output_key_base}_module'] = ".".join( jfi.get_func_name(func.func)[0]) entry[f'{output_key_base}_name'] = jfi.get_func_name(func.func)[1] entry[f'{output_key_base}_args'] = func.args entry[f'{output_key_base}_kwargs'] = func.keywords return entry
def write_datasets(ds, path=None, filename="datasets.json", indent=4, sort_keys=True): """Write a serialized (JSON) dataset file Converts the callable `load_function` into something that can be serialized to json""" if path is None: path = _MODULE_DIR else: path = pathlib.Path(path) # copy, adjusting non-serializable items for key, entry in ds.items(): action = entry.get('action', 'fetch_and_process') entry['action'] = action func = entry.get('load_function', None) if func is None: if action == 'fetch_and_process': func = partial(new_dataset, dataset_name=key) elif action == 'generate': raise Exception('must specify generation function') else: raise Exception(f'Unknown action: {action}') else: del (entry['load_function']) entry['load_function_module'] = ".".join( jfi.get_func_name(func.func)[0]) entry['load_function_name'] = jfi.get_func_name(func.func)[1] entry['load_function_args'] = func.args entry['load_function_kwargs'] = func.keywords with open(path / filename, 'w') as fw: json.dump(ds, fw, indent=indent, sort_keys=sort_keys)
def serialize_partial(func, key_base='load_function'): """Serialize a function call to a dictionary. Parameters ---------- func: function function to serialize key_base: str. Default 'load_function' string to prepend to serialization parameters. Returns ------- dict containing: {key_base}_name: function name {key_base}_module: fully-qualified module name containing function {key_base}_args: args to pass to function {key_base}_kwargs: kwargs to pass to function """ entry = {} if func is None: logger.warning(f"serialize_partial: `{key_base}` is None. Ignoring.") return entry func = partial(func) entry[f'{key_base}_module'] = ".".join(jfi.get_func_name(func.func)[0]) entry[f'{key_base}_name'] = jfi.get_func_name(func.func)[1] entry[f'{key_base}_args'] = func.args entry[f'{key_base}_kwargs'] = func.keywords return entry
def test_func_inspect_errors(): # Check that func_inspect is robust and will work on weird objects nose.tools.assert_equal(get_func_name("a".lower)[-1], "lower") nose.tools.assert_equal(get_func_code("a".lower)[1:], (None, -1)) ff = lambda x: x nose.tools.assert_equal(get_func_name(ff, win_characters=False)[-1], "<lambda>") nose.tools.assert_equal(get_func_code(ff)[1], __file__.replace(".pyc", ".py")) # Simulate a function defined in __main__ ff.__module__ = "__main__" nose.tools.assert_equal(get_func_name(ff, win_characters=False)[-1], "<lambda>") nose.tools.assert_equal(get_func_code(ff)[1], __file__.replace(".pyc", ".py"))
def test_func_inspect_errors(): # Check that func_inspect is robust and will work on weird objects assert get_func_name("a".lower)[-1] == "lower" assert get_func_code("a".lower)[1:] == (None, -1) ff = lambda x: x assert get_func_name(ff, win_characters=False)[-1] == "<lambda>" assert get_func_code(ff)[1] == __file__.replace(".pyc", ".py") # Simulate a function defined in __main__ ff.__module__ = "__main__" assert get_func_name(ff, win_characters=False)[-1] == "<lambda>" assert get_func_code(ff)[1] == __file__.replace(".pyc", ".py")
def test_func_inspect_errors(): # Check that func_inspect is robust and will work on weird objects assert get_func_name('a'.lower)[-1] == 'lower' assert get_func_code('a'.lower)[1:] == (None, -1) ff = lambda x: x assert get_func_name(ff, win_characters=False)[-1] == '<lambda>' assert get_func_code(ff)[1] == __file__.replace('.pyc', '.py') # Simulate a function defined in __main__ ff.__module__ = '__main__' assert get_func_name(ff, win_characters=False)[-1] == '<lambda>' assert get_func_code(ff)[1] == __file__.replace('.pyc', '.py')
def test_func_inspect_errors(): # Check that func_inspect is robust and will work on weird objects nose.tools.assert_equal(get_func_name('a'.lower)[-1], 'lower') nose.tools.assert_equal(get_func_code('a'.lower)[1:], (None, -1)) ff = lambda x: x nose.tools.assert_equal( get_func_name(ff, win_characters=False)[-1], '<lambda>') nose.tools.assert_equal( get_func_code(ff)[1], __file__.replace('.pyc', '.py')) # Simulate a function defined in __main__ ff.__module__ = '__main__' nose.tools.assert_equal( get_func_name(ff, win_characters=False)[-1], '<lambda>') nose.tools.assert_equal( get_func_code(ff)[1], __file__.replace('.pyc', '.py'))
def test_func_inspect_errors(): # Check that func_inspect is robust and will work on weird objects nose.tools.assert_equal(get_func_name('a'.lower)[-1], 'lower') nose.tools.assert_equal(get_func_code('a'.lower)[1:], (None, -1)) ff = lambda x: x nose.tools.assert_equal(get_func_name(ff, win_characters=False)[-1], '<lambda>') nose.tools.assert_equal(get_func_code(ff)[1], __file__.replace('.pyc', '.py')) # Simulate a function defined in __main__ ff.__module__ = '__main__' nose.tools.assert_equal(get_func_name(ff, win_characters=False)[-1], '<lambda>') nose.tools.assert_equal(get_func_code(ff)[1], __file__.replace('.pyc', '.py'))
def serialize_partial(func): """Serialize a function call to a dictionary. Parameters ---------- func: partial function. Returns ------- dict containing: load_function_name: function name load_function_module: fully-qualified module name containing function load_function_args: args to pass to function load_function_kwargs: kwargs to pass to function """ func = partial(func) entry = {} entry['load_function_module'] = ".".join(jfi.get_func_name(func.func)[0]) entry['load_function_name'] = jfi.get_func_name(func.func)[1] entry['load_function_args'] = func.args entry['load_function_kwargs'] = func.keywords return entry
def get_job(self, func, args_dict): if not hasattr(func, "version_info"): raise ValueError("func does not have @versioned decorator") func_hash = encode_digest(func.version_info["digest"]) fplst, name = get_func_name(func) fplst.append(name) func_path = pjoin(*fplst) h = NumpyHasher("sha1") h.hash((func.version_info["digest"], args_dict)) job_hash = encode_digest(h._hash.digest()) return ClusterJob(self, func, func_path, func_hash, job_hash)
def get_job(self, func, args_dict): if not hasattr(func, 'version_info'): raise ValueError('func does not have @versioned decorator') func_hash = encode_digest(func.version_info['digest']) fplst, name = get_func_name(func) fplst.append(name) func_path = pjoin(*fplst) h = NumpyHasher('sha1') h.hash((func.version_info['digest'], args_dict)) job_hash = encode_digest(h._hash.digest()) return ClusterJob(self, func, func_path, func_hash, job_hash)
def dec(func): # Make hash. The function hash does not consider dependencies. _version = version h = hashlib.sha1() module, name = get_func_name(func) h.update(".".join(module + [name])) h.update("$") if version is None: # No manual version; use the hash of the contents as version src, source_file, lineno = get_func_code(func) _version = base64.b32encode(hashlib.sha1(src).digest()).lower() else: _version = str(version) h.update(_version.encode("UTF-8")) # Store information func.version_info = dict( version=_version, ignore_deps=deps == False, ignore_args=tuple(ignore), digest=h.digest() ) return func
def dec(func): # Make hash. The function hash does not consider dependencies. _version = version h = hashlib.sha1() module, name = get_func_name(func) h.update('.'.join(module + [name])) h.update('$') if version is None: # No manual version; use the hash of the contents as version src, source_file, lineno = get_func_code(func) _version = base64.b32encode(hashlib.sha1(src).digest()).lower() else: _version = str(version) h.update(_version.encode('UTF-8')) # Store information func.version_info = dict(version=_version, ignore_deps=deps == False, ignore_args=tuple(ignore), digest=h.digest()) return func
def test_func_name(func, funcname): # Check that we are not confused by decoration # here testcase 'cached_func' is the function itself assert get_func_name(func)[1] == funcname
def test_func_name_on_inner_func(cached_func): # Check that we are not confused by decoration # here testcase 'cached_func' is the 'cached_func_inner' function # returned by 'cached_func' fixture assert get_func_name(cached_func)[1] == 'cached_func_inner'
def _lru_cache_wrapper(user_function, directory, maxsize, typed, _CacheInfo): # Constants shared by all lru cache instances: sentinel = object() # unique object used to signal cache misses make_key = _make_key # build a key from the function arguments PREV, NEXT, KEY, RESULT = 0, 1, 2, 3 # names for the link fields source_code = str(get_func_code(user_function)[0]) code_hash = hashlib.sha1(source_code.encode()).hexdigest() module, name = get_func_name(user_function) module_str = '.'.join(module) subdir = f"{module_str}.{name}_{code_hash}" cache = diskcache.Cache(directory=directory + '/' + subdir) hits = misses = 0 full = False cache_get = cache.get # bound method to lookup a key or return None cache_len = cache.__len__ # get cache size without calling len() lock = RLock() # because linkedlist updates aren't threadsafe root = [] # root of the circular doubly linked list root[:] = [root, root, None, None] # initialize by pointing to self if maxsize == 0: def wrapper(*args, **kwds): # No caching -- just a statistics update nonlocal misses misses += 1 result = user_function(*args, **kwds) return result elif maxsize is None: def wrapper(*args, **kwds): # Simple caching without ordering or size limit nonlocal hits, misses key = make_key(args, kwds, typed) result = cache_get(key, sentinel) if result is not sentinel: hits += 1 return result misses += 1 result = user_function(*args, **kwds) cache[key] = result return result else: def wrapper(*args, **kwds): # Size limited caching that tracks accesses by recency nonlocal root, hits, misses, full key = make_key(args, kwds, typed) with lock: link = cache_get(key) if link is not None: # Move the link to the front of the circular queue link_prev, link_next, _key, result = link link_prev[NEXT] = link_next link_next[PREV] = link_prev last = root[PREV] last[NEXT] = root[PREV] = link link[PREV] = last link[NEXT] = root hits += 1 return result misses += 1 result = user_function(*args, **kwds) with lock: if key in cache: # Getting here means that this same key was added to the # cache while the lock was released. Since the link # update is already done, we need only return the # computed result and update the count of misses. pass elif full: # Use the old root to store the new key and result. oldroot = root oldroot[KEY] = key oldroot[RESULT] = result # Empty the oldest link and make it the new root. # Keep a reference to the old key and old result to # prevent their ref counts from going to zero during the # update. That will prevent potentially arbitrary object # clean-up code (i.e. __del__) from running while we're # still adjusting the links. root = oldroot[NEXT] oldkey = root[KEY] oldresult = root[RESULT] root[KEY] = root[RESULT] = None # Now update the cache dictionary. del cache[oldkey] # Save the potentially reentrant cache[key] assignment # for last, after the root and links have been put in # a consistent state. cache[key] = oldroot else: # Put result in a new link at the front of the queue. last = root[PREV] link = [last, root, key, result] last[NEXT] = root[PREV] = cache[key] = link # Use the cache_len bound method instead of the len() function # which could potentially be wrapped in an lru_cache itself. full = (cache_len() >= maxsize) return result def cache_info(): """Report cache statistics""" with lock: return _CacheInfo(hits, misses, maxsize, cache_len()) def cache_clear(): """Clear the cache and cache statistics""" nonlocal hits, misses, full with lock: cache.clear() root[:] = [root, root, None, None] hits = misses = 0 full = False wrapper.cache_get = cache_get wrapper.cache_info = cache_info wrapper.cache_clear = cache_clear return wrapper
def test_func_name(): yield nose.tools.assert_equal, 'f', get_func_name(f)[1] # Check that we are not confused by the decoration yield nose.tools.assert_equal, 'g', get_func_name(g)[1]