def test_iterators(self): pairs = [('c', 1), ('b', 2), ('a', 3), ('d', 4), ('e', 5), ('f', 6)] shuffle(pairs) od = OrderedDict(pairs) self.assertEqual(list(od), [t[0] for t in pairs]) self.assertEqual(list(od.keys()), [t[0] for t in pairs]) self.assertEqual(list(od.values()), [t[1] for t in pairs]) self.assertEqual(list(od.items()), pairs) self.assertEqual(list(reversed(od)), [t[0] for t in reversed(pairs)]) self.assertEqual(list(reversed(od.keys())), [t[0] for t in reversed(pairs)]) self.assertEqual(list(reversed(od.values())), [t[1] for t in reversed(pairs)]) self.assertEqual(list(reversed(od.items())), list(reversed(pairs)))
def test_iterators(self): pairs = [('c', 1), ('b', 2), ('a', 3), ('d', 4), ('e', 5), ('f', 6)] shuffle(pairs) od = OrderedDict(pairs) self.assertEqual(list(od), [t[0] for t in pairs]) self.assertEqual(list(od.keys()), [t[0] for t in pairs]) self.assertEqual(list(od.values()), [t[1] for t in pairs]) self.assertEqual(list(od.items()), pairs) self.assertEqual(list(reversed(od)), [t[0] for t in reversed(pairs)]) self.assertEqual(list(reversed(od.keys())), [t[0] for t in reversed(pairs)]) self.assertEqual(list(reversed(od.values())), [t[1] for t in reversed(pairs)]) self.assertEqual(list(reversed(od.items())), list(reversed(pairs)))
def _stats(self): _stats = OrderedDict() _stats['id_string'] = self._get_id_string() _stats['version'] = self.id _stats['row_count'] = len(self.schema.get('content', {}).get('survey', [])) # returns stats in the format [ key="value" ] return '\n\t'.join(map(lambda key: '%s="%s"' % (key, str(_stats[key])), _stats.keys()))
def _stats(self): _stats = OrderedDict() _stats['id_string'] = self._get_id_string() _stats['version'] = self.id _stats['row_count'] = len( self.schema.get('content', {}).get('survey', [])) # returns stats in the format [ key="value" ] return '\n\t'.join( map(lambda key: '%s="%s"' % (key, str(_stats[key])), _stats.keys()))
class MultiDimensionalMapping(Dimensioned): """ An MultiDimensionalMapping is a Dimensioned mapping (like a dictionary or array) that uses fixed-length multidimensional keys. This behaves like a sparse N-dimensional array that does not require a dense sampling over the multidimensional space. If the underlying value for each (key,value) pair also supports indexing (such as a dictionary, array, or list), fully qualified (deep) indexing may be used from the top level, with the first N dimensions of the index selecting a particular Dimensioned object and the remaining dimensions indexing into that object. For instance, for a MultiDimensionalMapping with dimensions "Year" and "Month" and underlying values that are 2D floating-point arrays indexed by (r,c), a 2D array may be indexed with x[2000,3] and a single floating-point number may be indexed as x[2000,3,1,9]. In practice, this class is typically only used as an abstract base class, because the NdMapping subclass extends it with a range of useful slicing methods for selecting subsets of the data. Even so, keeping the slicing support separate from the indexing and data storage methods helps make both classes easier to understand. """ group = param.String(default='MultiDimensionalMapping') key_dimensions = param.List(default=[Dimension("Default")], constant=True) data_type = None # Optional type checking of elements _deep_indexable = False _sorted = True def __init__(self, initial_items=None, **params): if isinstance(initial_items, NdMapping): map_type = type(initial_items) own_params = self.params() new_params = dict(initial_items.get_param_values(onlychanged=True)) if new_params.get('group') == map_type.__name__: new_params.pop('group') params = dict( { name: value for name, value in new_params.items() if name in own_params }, **params) super(MultiDimensionalMapping, self).__init__(OrderedDict(), **params) self._next_ind = 0 self._check_key_type = True self._cached_index_types = [d.type for d in self.key_dimensions] self._cached_index_values = { d.name: d.values for d in self.key_dimensions } self._cached_categorical = any(d.values for d in self.key_dimensions) if isinstance(initial_items, tuple): self._add_item(initial_items[0], initial_items[1]) elif initial_items is not None: self.update(OrderedDict(initial_items)) def _item_check(self, dim_vals, data): """ Applies optional checks to individual data elements before they are inserted ensuring that they are of a certain type. Subclassed may implement further element restrictions. """ if self.data_type is not None and not isinstance(data, self.data_type): if isinstance(self.data_type, tuple): data_type = tuple(dt.__name__ for dt in self.data_type) else: data_type = self.data_type.__name__ raise TypeError( '{slf} does not accept {data} type, data elements have ' 'to be a {restr}.'.format(slf=type(self).__name__, data=type(data).__name__, restr=data_type)) elif not len(dim_vals) == self.ndims: raise KeyError('Key has to match number of dimensions.') def _add_item(self, dim_vals, data, sort=True): """ Adds item to the data, applying dimension types and ensuring key conforms to Dimension type and values. """ if not isinstance(dim_vals, tuple): dim_vals = (dim_vals, ) self._item_check(dim_vals, data) # Apply dimension types dim_types = zip(self._cached_index_types, dim_vals) dim_vals = tuple(v if t is None else t(v) for t, v in dim_types) # Check and validate for categorical dimensions if self._cached_categorical: valid_vals = zip(self._cached_index_names, dim_vals) else: valid_vals = [] for dim, val in valid_vals: vals = self._cached_index_values[dim] if vals and val not in vals: raise KeyError('%s Dimension value %s not in' ' specified Dimension values.' % (dim, repr(val))) # Updates nested data structures rather than simply overriding them. if ((dim_vals in self.data) and isinstance(self.data[dim_vals], (NdMapping, OrderedDict))): self.data[dim_vals].update(data) else: self.data[dim_vals] = data if sort: self._resort() def _apply_key_type(self, keys): """ If a type is specified by the corresponding key dimension, this method applies the type to the supplied key. """ typed_key = () for dim, key in zip(self.key_dimensions, keys): key_type = dim.type if key_type is None: typed_key += (key, ) elif isinstance(key, slice): sl_vals = [key.start, key.stop, key.step] typed_key += (slice(*[ key_type(el) if el is not None else None for el in sl_vals ]), ) elif key is Ellipsis: typed_key += (key, ) elif isinstance(key, list): typed_key += ([key_type(k) for k in key], ) else: typed_key += (key_type(key), ) return typed_key def _split_index(self, key): """ Partitions key into key and deep dimension groups. If only key indices are supplied, the data is indexed with an empty tuple. """ if not isinstance(key, tuple): key = (key, ) map_slice = key[:self.ndims] if self._check_key_type: map_slice = self._apply_key_type(map_slice) if len(key) == self.ndims: return map_slice, () else: return map_slice, key[self.ndims:] def _dataslice(self, data, indices): """ Returns slice of data element if the item is deep indexable. Warns if attempting to slice an object that has not been declared deep indexable. """ if isinstance(data, Dimensioned): return data[indices] elif len(indices) > 0: self.warning('Cannot index into data element, extra data' ' indices ignored.') return data def _resort(self): """ Sorts data by key using usual Python tuple sorting semantics or sorts in categorical order for any categorical Dimensions. """ sortkws = {} dimensions = self.key_dimensions if self._cached_categorical: sortkws['key'] = lambda x: tuple(dimensions[i].values.index(x[0][ i]) if dimensions[i].values else x[0][i] for i in range(self.ndims)) self.data = OrderedDict(sorted(self.data.items(), **sortkws)) def groupby(self, dimensions, container_type=None, group_type=None, **kwargs): """ Splits the mapping into groups by key dimension which are then returned together in a mapping of class container_type. The individual groups are of the same type as the original map. """ if self.ndims == 1: self.warning('Cannot split Map with only one dimension.') return self container_type = container_type if container_type else type(self) group_type = group_type if group_type else type(self) dims, inds = zip(*((self.get_dimension(dim), self.get_dimension_index(dim)) for dim in dimensions)) inames, idims = zip(*((dim.name, dim) for dim in self.key_dimensions if not dim.name in dimensions)) selects = unique_iterator( itemgetter(*inds)(key) if len(inds) > 1 else (key[inds[0]], ) for key in self.data.keys()) groups = [ (sel, group_type( self.select(**dict(zip(dimensions, sel))).reindex(inames), **kwargs)) for sel in selects ] return container_type(groups, key_dimensions=dims) def add_dimension(self, dimension, dim_pos, dim_val, **kwargs): """ Create a new object with an additional key dimensions along which items are indexed. Requires the dimension name, the desired position in the key_dimensions and a key value that will be used across the dimension. This is particularly useful for merging several mappings together. """ if isinstance(dimension, str): dimension = Dimension(dimension) if dimension.name in self._cached_index_names: raise Exception( '{dim} dimension already defined'.format(dim=dimension.name)) dimensions = self.key_dimensions[:] dimensions.insert(dim_pos, dimension) items = OrderedDict() for key, val in self.data.items(): new_key = list(key) new_key.insert(dim_pos, dim_val) items[tuple(new_key)] = val return self.clone(items, key_dimensions=dimensions, **kwargs) def drop_dimension(self, dim): """ Returns a new mapping with the named dimension removed. Ensures that the dropped dimension is constant (owns only a single key value) before dropping it. """ dim_labels = [d for d in self._cached_index_names if d != dim] return self.reindex(dim_labels) def dimension_values(self, dimension): "Returns the values along the specified dimension." all_dims = [d.name for d in self.dimensions()] if isinstance(dimension, int): dimension = all_dims[dimension] if dimension in self._cached_index_names: values = [ k[self.get_dimension_index(dimension)] for k in self.data.keys() ] elif dimension in all_dims: values = [ el.dimension_values(dimension) for el in self if dimension in el.dimensions() ] values = np.concatenate(values) else: raise Exception('Dimension %s not found.' % dimension) return values def reindex(self, dimension_labels=[], force=False): """ Create a new object with a re-ordered or reduced set of key dimensions. Reducing the number of key dimensions will discard information from the keys. All data values are accessible in the newly created object as the new labels must be sufficient to address each value uniquely. """ if not len(dimension_labels): dimension_labels = [ d for d in self._cached_index_names if not len(set(self.dimension_values(d))) == 1 ] indices = [self.get_dimension_index(el) for el in dimension_labels] keys = [tuple(k[i] for i in indices) for k in self.data.keys()] reindexed_items = OrderedDict( (k, v) for (k, v) in zip(keys, self.data.values())) reduced_dims = set( self._cached_index_names).difference(dimension_labels) dimensions = [ self.get_dimension(d) for d in dimension_labels if d not in reduced_dims ] if len(set(keys)) != len(keys) and not force: raise Exception( "Given dimension labels not sufficient to address all values uniquely" ) if len(keys): constant_dimensions = { self.get_dimension(d): self.dimension_values(d)[0] for d in reduced_dims } else: constant_dimensions = {} return self.clone(reindexed_items, key_dimensions=dimensions, constant_dimensions=constant_dimensions) @property def last(self): "Returns the item highest data item along the map dimensions." return list(self.data.values())[-1] if len(self) else None @property def last_key(self): "Returns the last key value." return list(self.keys())[-1] if len(self) else None @property def info(self): """ Prints information about the Dimensioned object, including the number and type of objects contained within it and information about its dimensions. """ info_str = self.__class__.__name__ +\ " containing %d items of type %s\n" % (len(self.keys()), type(self.values()[0]).__name__) info_str += ('-' * (len(info_str) - 1)) + "\n\n" for group in self._dim_groups: dimensions = getattr(self, group) if dimensions: info_str += '%s Dimensions: \n' % group.capitalize() for d in dimensions: dmin, dmax = self.range(d.name) if d.formatter: dmin, dmax = d.formatter(dmin), d.formatter(dmax) info_str += '\t %s: %s...%s \n' % (str(d), dmin, dmax) print(info_str) def table(self, **kwargs): "Creates a table from the stored keys and data." table = None for key, value in self.data.items(): value = value.table(**kwargs) for idx, (dim, val) in enumerate(zip(self.key_dimensions, key)): value = value.add_dimension(dim, idx, val) if table is None: table = value else: table.update(value) return table def dframe(self): "Creates a pandas DataFrame from the stored keys and data." try: import pandas except ImportError: raise Exception( "Cannot build a DataFrame without the pandas library.") labels = self._cached_index_names + [self.group] return pandas.DataFrame( [dict(zip(labels, k + (v, ))) for (k, v) in self.data.items()]) def update(self, other): """ Updates the current mapping with some other mapping or OrderedDict instance, making sure that they are indexed along the same set of dimensions. The order of key_dimensions remains unchanged after the update. """ if isinstance(other, NdMapping): if self.key_dimensions != other.key_dimensions: raise KeyError("Cannot update with NdMapping that has" " a different set of key dimensions.") for key, data in other.items(): self._add_item(key, data, sort=False) self._resort() def keys(self): " Returns the keys of all the elements." if self.ndims == 1: return [k[0] for k in self.data.keys()] else: return list(self.data.keys()) def values(self): " Returns the values of all the elements." return list(self.data.values()) def items(self): "Returns all elements as a list in (key,value) format." return list(zip(list(self.keys()), list(self.values()))) def get(self, key, default=None): "Standard get semantics for all mapping types" try: if key is None: return None return self[key] except: return default def pop(self, key, default=None): "Standard pop semantics for all mapping types" if not isinstance(key, tuple): key = (key, ) return self.data.pop(key, default) def __getitem__(self, key): """ Allows multi-dimensional indexing in the order of the specified key dimensions, passing any additional indices to the data elements. """ if key in [Ellipsis, ()]: return self map_slice, data_slice = self._split_index(key) return self._dataslice(self.data[map_slice], data_slice) def __setitem__(self, key, value): self._add_item(key, value) def __str__(self): return repr(self) def __iter__(self): return iter(self.values()) def __contains__(self, key): if self.ndims == 1: return key in self.data.keys() else: return key in self.keys() def __len__(self): return len(self.data)
class OrderedSet(collections.MutableSet, collections.Sequence): """ An OrderedSet is a custom MutableSet that remembers its order, so that every entry has an index that can be looked up. Based on version written by Luminoso Technologies: https://github.com/LuminosoInsight/ordered-set Unlike that implementation, this class uses OrderedDict as storage and supports key removal. We drop support for indexing, and add support for fixed-size sets with maxlen parameter. With a small modification, this class can be made into an LRU cache. """ def __init__(self, iterable=None, maxlen=None): self._mapping = OrderedDict() self._maxlen = maxlen if iterable is not None: self |= iterable def __len__(self): return len(self._mapping) def __getitem__(self, index): """ Get the item at a given index. If `index` is a slice, you will get back that slice of items. If it's the slice [:], exactly the same object is returned. (If you want an independent copy of an OrderedSet, use `OrderedSet.copy()`.) If `index` is an iterable, you'll get the OrderedSet of items corresponding to those indices. This is similar to NumPy's "fancy indexing". """ if index == SLICE_ALL: return self elif hasattr(index, '__index__') or isinstance(index, slice): result = self._mapping.keys()[index] if isinstance(result, list): return OrderedSet(result) else: return result elif isiterable(index): keys = self._mapping.keys() return OrderedSet([keys[i] for i in index]) else: raise TypeError("Don't know how to index an OrderedSet by %r" % index) def copy(self): return OrderedSet(self) def __getstate__(self): if len(self) == 0: # The state can't be an empty list. # We need to return a truthy value, or else __setstate__ won't be run. # # This could have been done more gracefully by always putting the state # in a tuple, but this way is backwards- and forwards- compatible with # previous versions of OrderedSet. return (None,) else: return list(self) def __setstate__(self, state): if state == (None,): self.__init__([]) else: self.__init__(state) def __contains__(self, key): return key in self._mapping def add(self, key): """ Add `key` as an item to this OrderedSet, then return its index. If `key` is already in the OrderedSet, return the index it already had. """ if key not in self._mapping: if self._maxlen is None or len(self._mapping) < self._maxlen: self._mapping[key] = 1 else: self._mapping.popitem(last=False) self._mapping[key] = 1 append = add def discard(self, key): del self._mapping[key] def __iter__(self): return self._mapping.iterkeys() def __reversed__(self): return reversed(self._mapping.keys()) def __repr__(self): if not self: return '%s()' % (self.__class__.__name__,) return '%s(%r)' % (self.__class__.__name__, list(self)) def __eq__(self, other): if isinstance(other, OrderedSet): return len(self) == len(other) and \ self._mapping.keys() == other._mapping.keys() try: other_as_set = set(other) except TypeError: # If `other` can't be converted into a set, it's not equal. return False else: return set(self) == other_as_set
class AttrTree(object): """ An AttrTree offers convenient, multi-level attribute access for collections of objects. AttrTree objects may also be combined together using the update method or merge classmethod. Here is an example of adding a ViewableElement to an AttrTree and accessing it: >>> t = AttrTree() >>> t.Example.Path = 1 >>> t.Example.Path #doctest: +ELLIPSIS 1 """ _disabled_prefixes = [] # Underscore attributes that should be _sanitizer = util.sanitize_identifier @classmethod def merge(cls, trees): """ Merge a collection of AttrTree objects. """ first = trees[0] for tree in trees: first.update(tree) return first def __dir__(self): """ The _dir_mode may be set to 'default' or 'user' in which case only the child nodes added by the user are listed. """ dict_keys = self.__dict__.keys() if self.__dict__['_dir_mode'] == 'user': return self.__dict__['children'] else: return dir(type(self)) + list(dict_keys) def __init__(self, items=None, identifier=None, parent=None, dir_mode='default'): """ identifier: A string identifier for the current node (if any) parent: The parent node (if any) items: Items as (path, value) pairs to construct (sub)tree down to given leaf values. Note that the root node does not have a parent and does not require an identifier. """ self.__dict__['parent'] = parent self.__dict__['identifier'] = type(self)._sanitizer(identifier, escape=False) self.__dict__['children'] = [] self.__dict__['_fixed'] = False self.__dict__['_dir_mode'] = dir_mode # Either 'default' or 'user' fixed_error = 'No attribute %r in this AttrTree, and none can be added because fixed=True' self.__dict__['_fixed_error'] = fixed_error self.__dict__['data'] = OrderedDict() items = items.items() if isinstance(items, OrderedDict) else items # Python 3 items = list(items) if items else items items = [] if not items else items for path, item in items: self.set_path(path, item) @property def path(self): "Returns the path up to the root for the current node." if self.parent: return '.'.join([self.parent.path, str(self.identifier)]) else: return self.identifier if self.identifier else self.__class__.__name__ @property def fixed(self): "If fixed, no new paths can be created via attribute access" return self.__dict__['_fixed'] @fixed.setter def fixed(self, val): self.__dict__['_fixed'] = val def update(self, other): """ Updated the contents of the current AttrTree with the contents of a second AttrTree. """ if not isinstance(other, AttrTree): raise Exception('Can only update with another AttrTree type.') fixed_status = (self.fixed, other.fixed) (self.fixed, other.fixed) = (False, False) for identifier, element in other.items(): if identifier not in self.data: self[identifier] = element else: self[identifier].update(element) (self.fixed, other.fixed) = fixed_status def set_path(self, path, val): """ Set the given value at the supplied path where path is either a tuple of strings or a string in A.B.C format. """ path = tuple(path.split('.')) if isinstance(path, str) else tuple(path) disallowed = [ p for p in path if not type(self)._sanitizer.allowable(p) ] if any(disallowed): raise Exception("Attribute strings in path elements cannot be " "correctly escaped : %s" % ','.join(repr(el) for el in disallowed)) if len(path) > 1: attrtree = self.__getattr__(path[0]) attrtree.set_path(path[1:], val) else: self.__setattr__(path[0], val) def filter(self, path_filters): """ Filters the loaded AttrTree using the supplied path_filters. """ if not path_filters: return self # Convert string path filters path_filters = [ tuple(pf.split('.')) if not isinstance(pf, tuple) else pf for pf in path_filters ] # Search for substring matches between paths and path filters new_attrtree = self.__class__() for path, item in self.data.items(): if any([ all([subpath in path for subpath in pf]) for pf in path_filters ]): new_attrtree.set_path(path, item) return new_attrtree def _propagate(self, path, val): """ Propagate the value up to the root node. """ if val == '_DELETE': if path in self.data: del self.data[path] else: items = [(key, v) for key, v in self.data.items() if not all(k == p for k, p in zip(key, path))] self.data = OrderedDict(items) else: self.data[path] = val if self.parent is not None: self.parent._propagate((self.identifier, ) + path, val) def __setitem__(self, identifier, val): """ Set a value at a child node with given identifier. If at a root node, multi-level path specifications is allowed (i.e. 'A.B.C' format or tuple format) in which case the behaviour matches that of set_path. """ if isinstance(identifier, str) and '.' not in identifier: self.__setattr__(identifier, val) elif isinstance(identifier, str) and self.parent is None: self.set_path(tuple(identifier.split('.')), val) elif isinstance(identifier, tuple) and self.parent is None: self.set_path(identifier, val) else: raise Exception( "Multi-level item setting only allowed from root node.") def __getitem__(self, identifier): """ For a given non-root node, access a child element by identifier. If the node is a root node, you may also access elements using either tuple format or the 'A.B.C' string format. """ split_label = (tuple(identifier.split('.')) if isinstance( identifier, str) else tuple(identifier)) if len(split_label) == 1: identifier = split_label[0] if identifier in self.children: return self.__dict__[identifier] else: raise KeyError(identifier) path_item = self for identifier in split_label: path_item = path_item[identifier] return path_item def __delitem__(self, identifier): split_label = (tuple(identifier.split('.')) if isinstance( identifier, str) else tuple(identifier)) if len(split_label) == 1: identifier = split_label[0] if identifier in self.children: del self.__dict__[identifier] self.children.pop(self.children.index(identifier)) else: raise KeyError(identifier) self._propagate(split_label, '_DELETE') else: path_item = self for i, identifier in enumerate(split_label[:-1]): path_item = path_item[identifier] del path_item[split_label[-1]] def __setattr__(self, identifier, val): # Getattr is skipped for root and first set of children shallow = (self.parent is None or self.parent.parent is None) if util.tree_attribute(identifier) and self.fixed and shallow: raise AttributeError(self._fixed_error % identifier) super(AttrTree, self).__setattr__(identifier, val) if util.tree_attribute(identifier): if not identifier in self.children: self.children.append(identifier) self._propagate((identifier, ), val) def __getattr__(self, identifier): """ Access a identifier from the AttrTree or generate a new AttrTree with the chosen attribute path. """ try: return super(AttrTree, self).__getattr__(identifier) except AttributeError: pass # Attributes starting with __ get name mangled if identifier.startswith( '_' + type(self).__name__) or identifier.startswith('__'): raise AttributeError('Attribute %s not found.' % identifier) elif self.fixed == True: raise AttributeError(self._fixed_error % identifier) if not any( identifier.startswith(prefix) for prefix in type(self)._disabled_prefixes): sanitized = type(self)._sanitizer(identifier, escape=False) else: sanitized = identifier if sanitized in self.children: return self.__dict__[sanitized] if not sanitized.startswith('_') and util.tree_attribute(identifier): self.children.append(sanitized) dir_mode = self.__dict__['_dir_mode'] child_tree = self.__class__(identifier=sanitized, parent=self, dir_mode=dir_mode) self.__dict__[sanitized] = child_tree return child_tree else: raise AttributeError('%r object has no attribute %s.' % (type(self).__name__, identifier)) def __iter__(self): return iter(self.data.values()) def __contains__(self, name): return name in self.children or name in self.data def __len__(self): return len(self.data) def get(self, identifier, default=None): """Get a node of the AttrTree using its path string. Args: identifier: Path string of the node to return default: Value to return if no node is found Returns: The indexed node of the AttrTree """ split_label = (tuple(identifier.split('.')) if isinstance( identifier, str) else tuple(identifier)) if len(split_label) == 1: identifier = split_label[0] return self.__dict__.get(identifier, default) path_item = self for identifier in split_label: if path_item == default or path_item is None: return default path_item = path_item.get(identifier, default) return path_item def keys(self): "Keys of nodes in the AttrTree" return list(self.data.keys()) def items(self): "Keys and nodes of the AttrTree" return list(self.data.items()) def values(self): "Nodes of the AttrTree" return list(self.data.values()) def pop(self, identifier, default=None): """Pop a node of the AttrTree using its path string. Args: identifier: Path string of the node to return default: Value to return if no node is found Returns: The node that was removed from the AttrTree """ if identifier in self.children: item = self[identifier] self.__delitem__(identifier) return item else: return default def __repr__(self): return PrettyPrinter.pprint(self)
class AttrTree(object): """ An AttrTree offers convenient, multi-level attribute access for collections of objects. AttrTree objects may also be combined together using the update method or merge classmethod. Here is an example of adding a ViewableElement to an AttrTree and accessing it: >>> t = AttrTree() >>> t.Example.Path = 1 >>> t.Example.Path #doctest: +ELLIPSIS 1 """ _disabled_prefixes = [] # Underscore attributes that should be _sanitizer = util.sanitize_identifier @classmethod def merge(cls, trees): """ Merge a collection of AttrTree objects. """ first = trees[0] for tree in trees: first.update(tree) return first def __dir__(self): """ The _dir_mode may be set to 'default' or 'user' in which case only the child nodes added by the user are listed. """ dict_keys = self.__dict__.keys() if self.__dict__['_dir_mode'] == 'user': return self.__dict__['children'] else: return dir(type(self)) + list(dict_keys) def __init__(self, items=None, identifier=None, parent=None, dir_mode='default'): """ identifier: A string identifier for the current node (if any) parent: The parent node (if any) items: Items as (path, value) pairs to construct (sub)tree down to given leaf values. Note that the root node does not have a parent and does not require an identifier. """ self.__dict__['parent'] = parent self.__dict__['identifier'] = type(self)._sanitizer(identifier, escape=False) self.__dict__['children'] = [] self.__dict__['_fixed'] = False self.__dict__['_dir_mode'] = dir_mode # Either 'default' or 'user' fixed_error = 'No attribute %r in this AttrTree, and none can be added because fixed=True' self.__dict__['_fixed_error'] = fixed_error self.__dict__['data'] = OrderedDict() items = items.items() if isinstance(items, OrderedDict) else items # Python 3 items = list(items) if items else items items = [] if not items else items for path, item in items: self.set_path(path, item) @property def path(self): "Returns the path up to the root for the current node." if self.parent: return '.'.join([self.parent.path, str(self.identifier)]) else: return self.identifier if self.identifier else self.__class__.__name__ @property def fixed(self): "If fixed, no new paths can be created via attribute access" return self.__dict__['_fixed'] @fixed.setter def fixed(self, val): self.__dict__['_fixed'] = val def update(self, other): """ Updated the contents of the current AttrTree with the contents of a second AttrTree. """ if not isinstance(other, AttrTree): raise Exception('Can only update with another AttrTree type.') fixed_status = (self.fixed, other.fixed) (self.fixed, other.fixed) = (False, False) for identifier, element in other.items(): if identifier not in self.data: self[identifier] = element else: self[identifier].update(element) (self.fixed, other.fixed) = fixed_status def set_path(self, path, val): """ Set the given value at the supplied path where path is either a tuple of strings or a string in A.B.C format. """ path = tuple(path.split('.')) if isinstance(path , str) else tuple(path) disallowed = [p for p in path if not type(self)._sanitizer.allowable(p)] if any(disallowed): raise Exception("Attribute strings in path elements cannot be " "correctly escaped : %s" % ','.join(repr(el) for el in disallowed)) if len(path) > 1: attrtree = self.__getattr__(path[0]) attrtree.set_path(path[1:], val) else: self.__setattr__(path[0], val) def filter(self, path_filters): """ Filters the loaded AttrTree using the supplied path_filters. """ if not path_filters: return self # Convert string path filters path_filters = [tuple(pf.split('.')) if not isinstance(pf, tuple) else pf for pf in path_filters] # Search for substring matches between paths and path filters new_attrtree = self.__class__() for path, item in self.data.items(): if any([all([subpath in path for subpath in pf]) for pf in path_filters]): new_attrtree.set_path(path, item) return new_attrtree def _propagate(self, path, val): """ Propagate the value up to the root node. """ if val == '_DELETE': if path in self.data: del self.data[path] else: items = [(key, v) for key, v in self.data.items() if not all(k==p for k, p in zip(key, path))] self.data = OrderedDict(items) else: self.data[path] = val if self.parent is not None: self.parent._propagate((self.identifier,)+path, val) def __setitem__(self, identifier, val): """ Set a value at a child node with given identifier. If at a root node, multi-level path specifications is allowed (i.e. 'A.B.C' format or tuple format) in which case the behaviour matches that of set_path. """ if isinstance(identifier, str) and '.' not in identifier: self.__setattr__(identifier, val) elif isinstance(identifier, str) and self.parent is None: self.set_path(tuple(identifier.split('.')), val) elif isinstance(identifier, tuple) and self.parent is None: self.set_path(identifier, val) else: raise Exception("Multi-level item setting only allowed from root node.") def __getitem__(self, identifier): """ For a given non-root node, access a child element by identifier. If the node is a root node, you may also access elements using either tuple format or the 'A.B.C' string format. """ split_label = (tuple(identifier.split('.')) if isinstance(identifier, str) else tuple(identifier)) if len(split_label) == 1: identifier = split_label[0] if identifier in self.children: return self.__dict__[identifier] else: raise KeyError(identifier) path_item = self for identifier in split_label: path_item = path_item[identifier] return path_item def __delitem__(self, identifier): split_label = (tuple(identifier.split('.')) if isinstance(identifier, str) else tuple(identifier)) if len(split_label) == 1: identifier = split_label[0] if identifier in self.children: del self.__dict__[identifier] self.children.pop(self.children.index(identifier)) else: raise KeyError(identifier) self._propagate(split_label, '_DELETE') else: path_item = self for i, identifier in enumerate(split_label[:-1]): path_item = path_item[identifier] del path_item[split_label[-1]] def __setattr__(self, identifier, val): # Getattr is skipped for root and first set of children shallow = (self.parent is None or self.parent.parent is None) if util.tree_attribute(identifier) and self.fixed and shallow: raise AttributeError(self._fixed_error % identifier) super(AttrTree, self).__setattr__(identifier, val) if util.tree_attribute(identifier): if not identifier in self.children: self.children.append(identifier) self._propagate((identifier,), val) def __getattr__(self, identifier): """ Access a identifier from the AttrTree or generate a new AttrTree with the chosen attribute path. """ try: return super(AttrTree, self).__getattr__(identifier) except AttributeError: pass # Attributes starting with __ get name mangled if identifier.startswith('_' + type(self).__name__) or identifier.startswith('__'): raise AttributeError('Attribute %s not found.' % identifier) elif self.fixed==True: raise AttributeError(self._fixed_error % identifier) if not any(identifier.startswith(prefix) for prefix in type(self)._disabled_prefixes): sanitized = type(self)._sanitizer(identifier, escape=False) else: sanitized = identifier if sanitized in self.children: return self.__dict__[sanitized] if not sanitized.startswith('_') and util.tree_attribute(identifier): self.children.append(sanitized) dir_mode = self.__dict__['_dir_mode'] child_tree = self.__class__(identifier=sanitized, parent=self, dir_mode=dir_mode) self.__dict__[sanitized] = child_tree return child_tree else: raise AttributeError('%r object has no attribute %s.' % (type(self).__name__, identifier)) def __iter__(self): return iter(self.data.values()) def __contains__(self, name): return name in self.children or name in self.data def __len__(self): return len(self.data) def get(self, identifier, default=None): """Get a node of the AttrTree using its path string. Args: identifier: Path string of the node to return default: Value to return if no node is found Returns: The indexed node of the AttrTree """ split_label = (tuple(identifier.split('.')) if isinstance(identifier, str) else tuple(identifier)) if len(split_label) == 1: identifier = split_label[0] return self.__dict__.get(identifier, default) path_item = self for identifier in split_label: if path_item == default or path_item is None: return default path_item = path_item.get(identifier, default) return path_item def keys(self): "Keys of nodes in the AttrTree" return list(self.data.keys()) def items(self): "Keys and nodes of the AttrTree" return list(self.data.items()) def values(self): "Nodes of the AttrTree" return list(self.data.values()) def pop(self, identifier, default=None): """Pop a node of the AttrTree using its path string. Args: identifier: Path string of the node to return default: Value to return if no node is found Returns: The node that was removed from the AttrTree """ if identifier in self.children: item = self[identifier] self.__delitem__(identifier) return item else: return default def __repr__(self): return PrettyPrinter.pprint(self)
class OrderedSet(collections.MutableSet, collections.Sequence): """ An OrderedSet is a custom MutableSet that remembers its order, so that every entry has an index that can be looked up. Based on version written by Luminoso Technologies: https://github.com/LuminosoInsight/ordered-set Unlike that implementation, this class uses OrderedDict as storage and supports key removal. We drop support for indexing, and add support for fixed-size sets with maxlen parameter. With a small modification, this class can be made into an LRU cache. """ def __init__(self, iterable=None, maxlen=None): self._mapping = OrderedDict() self._maxlen = maxlen if iterable is not None: self |= iterable def __len__(self): return len(self._mapping) def __getitem__(self, index): """ Get the item at a given index. If `index` is a slice, you will get back that slice of items. If it's the slice [:], exactly the same object is returned. (If you want an independent copy of an OrderedSet, use `OrderedSet.copy()`.) If `index` is an iterable, you'll get the OrderedSet of items corresponding to those indices. This is similar to NumPy's "fancy indexing". """ if index == SLICE_ALL: return self elif hasattr(index, '__index__') or isinstance(index, slice): result = self._mapping.keys()[index] if isinstance(result, list): return OrderedSet(result) else: return result elif isiterable(index): keys = self._mapping.keys() return OrderedSet([keys[i] for i in index]) else: raise TypeError("Don't know how to index an OrderedSet by %r" % index) def copy(self): return OrderedSet(self) def __getstate__(self): if len(self) == 0: # The state can't be an empty list. # We need to return a truthy value, or else __setstate__ won't be run. # # This could have been done more gracefully by always putting the state # in a tuple, but this way is backwards- and forwards- compatible with # previous versions of OrderedSet. return (None, ) else: return list(self) def __setstate__(self, state): if state == (None, ): self.__init__([]) else: self.__init__(state) def __contains__(self, key): return key in self._mapping def add(self, key): """ Add `key` as an item to this OrderedSet, then return its index. If `key` is already in the OrderedSet, return the index it already had. """ if key not in self._mapping: if self._maxlen is None or len(self._mapping) < self._maxlen: self._mapping[key] = 1 else: self._mapping.popitem(last=False) self._mapping[key] = 1 append = add def discard(self, key): del self._mapping[key] def __iter__(self): return self._mapping.iterkeys() def __reversed__(self): return reversed(self._mapping.keys()) def __repr__(self): if not self: return '%s()' % (self.__class__.__name__, ) return '%s(%r)' % (self.__class__.__name__, list(self)) def __eq__(self, other): if isinstance(other, OrderedSet): return len(self) == len(other) and \ self._mapping.keys() == other._mapping.keys() try: other_as_set = set(other) except TypeError: # If `other` can't be converted into a set, it's not equal. return False else: return set(self) == other_as_set